1 Process

1.1 Clean and remove adaptadors

#QC filter, delete nextera adapter and low quality
bash src/03.trimgalore.sh
#Remove adapters
bash src/05.cutadapt

1.2 Infer ASVs

ASVs were inferred from each sequencing run separately with DADA2 in R

The code for each run is found in:

2022 src/Dada2_2022.html 2023 src/Dada2_2023.html

2 Post process

2.1 Filter by frequence

Import outputs from DADA2 to QIIME2

    #Create output results dir
    mkdir -p results/02.QIIME2

#Import run 2022 files

    #import fasta seqs
    qiime tools import --input-path results/01.Dada2/2022/ASVs_2022.fasta --type 'FeatureData[Sequence]' --output-path results/02.QIIME2/ASV_rep_seq_2022.qza

    # append missing header to the table for import
    cat <(echo -n "#OTU Table") results/01.Dada2/2022/ASV_to_seqs-nochim_2022.tsv > results/02.QIIME2/temp.txt

    # convert to biom
    biom convert -i results/02.QIIME2/temp.txt -o results/02.QIIME2/temp.biom --table-type="OTU table" --to-hdf5

    #create feature table
    qiime tools import --input-path results/02.QIIME2/temp.biom --type 'FeatureTable[Frequency]' --input-format BIOMV210Format --output-path results/02.QIIME2/ASV_table_2022.qza

    #Get visual summarizes
    qiime feature-table tabulate-seqs \
    --i-data results/02.QIIME2/ASV_rep_seq_2022.qza \
    --o-visualization results/02.QIIME2/ASV_rep_seq_2022.qzv

    qiime feature-table summarize \
    --i-table results/02.QIIME2/ASV_table_2022.qza \
    --o-visualization results/02.QIIME2/ASV_table_2022.qzv

#Import run 2023 files

    #import fasta seqs
    qiime tools import --input-path results/01.Dada2/2023/ASVs_2023.fasta --type 'FeatureData[Sequence]' --output-path results/02.QIIME2/ASV_rep_seq_2023.qza

    # append missing header to the table for import
    cat <(echo -n "#OTU Table") results/01.Dada2/2023/ASV_to_seqs-nochim_2023.tsv > results/02.QIIME2/temp2.txt

    # convert to biom
    biom convert -i results/02.QIIME2/temp2.txt -o results/02.QIIME2/temp2.biom --table-type="OTU table" --to-hdf5

    #create feature table
    qiime tools import --input-path results/02.QIIME2/temp2.biom --type 'FeatureTable[Frequency]' --input-format BIOMV210Format --output-path results/02.QIIME2/ASV_table_2023.qza

    #Get visual summarizes
    qiime feature-table tabulate-seqs \
    --i-data results/02.QIIME2/ASV_rep_seq_2023.qza \
    --o-visualization results/02.QIIME2/ASV_rep_seq_2023.qzv

    qiime feature-table summarize \
    --i-table results/02.QIIME2/ASV_table_2023.qza \
    --o-visualization results/02.QIIME2/ASV_table_2023.qzv

2.2 Filter by frequence

Here I removed all ASVs with a frequency of less than 0.1% of the mean sample depth. This cut-off excludes ASVs that are likely due to MiSeq bleed-through between runs (reported by Illumina to be 0.1% of reads). To calculate this cut-off I identified the mean sample depth, multiplied it by 0.001, and rounded to the nearest integer. This step are describe in this paper

# 2022
#Filter feature table by frequence
qiime feature-table filter-features --i-table  results/02.QIIME2/ASV_table_2022.qza --p-min-samples 1 --p-min-frequency 52 --o-filtered-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza

#Get visual feature table summary
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza --o-visualization results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qzv

#Filter rep-seqs 2022
qiime feature-table filter-seqs  --i-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza --i-data results/02.QIIME2/ASV_rep_seq_2022.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_2022_filter-freq52.qza

# 2023
#Filter feature table by frequence
qiime feature-table filter-features --i-table  results/02.QIIME2/ASV_table_2023.qza --p-min-samples 1 --p-min-frequency 97 --o-filtered-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza

#Get visual feature table summary
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --o-visualization results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qzv

#Filter rep-seqs 2023
qiime feature-table filter-seqs  --i-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --i-data results/02.QIIME2/ASV_rep_seq_2023.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_2023_filter-freq97.qza

2.3 Merge sequences and feature tables

#Merge rep seqs
qiime feature-table merge-seqs  --i-data results/02.QIIME2/ASV_rep_seq_2022_filter-freq52.qza results/02.QIIME2/ASV_rep_seq_2023_filter-freq97.qza   --o-merged-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza

#Merge feature tables
qiime feature-table merge --i-tables results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --p-overlap-method sum --o-merged-table results/02.QIIME2/merged-table-filters-freq52-97.qza

#Get visual summarizes
qiime feature-table tabulate-seqs --i-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-visualization results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qzv

qiime feature-table summarize --i-table results/02.QIIME2/merged-table-filters-freq52-97.qza --o-visualization results/02.QIIME2/merged-table-filters-freq52-97.qzv

3 Taxonomy assignment

#Tax classification
qiime feature-classifier classify-sklearn --i-classifier data/dbs/classifier_silva_138_trained.qza --i-reads results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-classification results/02.QIIME2/taxonomy_merge.qza --p-n-jobs 80

#get visualization
qiime metadata tabulate --m-input-file results/02.QIIME2/taxonomy_merge.qza --o-visualization results/02.QIIME2/taxonomy_merge.qzv

4 Filter by taxonomy

qiime taxa filter-table --i-table results/02.QIIME2/merged-table-filters-freq52-97.qza --i-taxonomy results/02.QIIME2/taxonomy_merge.qza --p-exclude Archaea,Eukaryota,Mitochondria,Chloroplast --p-include p__ --o-filtered-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza

qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --o-visualization results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qzv

5 Filter summary

Type ASVs Frequency Samples
2022 29302 2677260 53
2023 19143 3035299 30
2022-filter-freq 6316 2410664 53
2023-filter-freq 2935 2754795 30
Merge-fltr-freq 6316 5165459 83
Mrg-fltr-freq-aemc 6261 5084303 83

Remove in fasta seqs

#remove in fasta sequences
qiime feature-table filter-seqs  --i-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --i-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_filters_freq_merge_aemc.qza

6 Phylogeny

nohup qiime phylogeny align-to-tree-mafft-iqtree \
--p-n-threads auto --i-sequences results/02.QIIME2/ASV_rep_seq_filters_freq_merge_aemc.qza \
--o-alignment results/02.QIIME2/align-mft-iqt.qza \
--o-masked-alignment results/02.QIIME2/masked-align-iqtree.qza \
--o-tree results/02.QIIME2/unrooted-tree-iqtree.qza \
--o-rooted-tree results/02.QIIME2/rooted-tree-iqtree.qza --verbose > outs/07.phylogeny-iqtree.nohup &
[1] 2166700

7 Get info per sample

#taxonomy
qiime tools export --input-path results/02.QIIME2/taxonomy_merge.qza --output-path results/02.QIIME2/exports/taxonomy

#feature table
qiime tools export --input-path results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --output-path results/02.QIIME2/exports/ASV_table

#reformat taxonomy tsv
sed -i -e '1 s/Feature/#Feature/' -e '1 s/Taxon/taxonomy/' results/02.QIIME2/exports/taxonomy/taxonomy.tsv

#Add taxonomy to feature table
biom add-metadata -i results/02.QIIME2/exports/ASV_table/feature-table.biom -o results/02.QIIME2/exports/feature-table_tax.biom --observation-metadata-fp results/02.QIIME2/exports/taxonomy/taxonomy.tsv --sc-separated results/02.QIIME2/exports/taxonomy/taxonomy.tsv

#Convert to tsv from biom format
biom convert -i results/02.QIIME2/exports/feature-table_tax.biom -o results/02.QIIME2/exports/feature-table_tax.tsv --to-tsv --header-key taxonomy

#Get effective reads per sample
biom summarize-table -i results/02.QIIME2/exports/feature-table_tax.biom -o results/02.QIIME2/exports/feature-table_tax_reads_per_sample_summary.txt

#Get ASVs per sample
biom summarize-table -i results/02.QIIME2/exports/feature-table_tax.biom --qualitative -o results/02.QIIME2/exports/feature-table_tax_ASVs_per_sample_summary.txt

8 Data Prepare

load("src/Diversity.RData")

8.1 Filter samples

It is recommended to choose specimens that have at least two compartments. This will enable comparison between them.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Filter
metadata_fltr <- metadata %>%
  group_by(Specimen) %>%
  filter(n() >= 2) %>%
  ungroup()

#save table
write.table(metadata_fltr, "results/03.Diversity/Metadata_fltr.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

Compare

dim(metadata)
## [1] 83 13
dim(metadata_fltr)
## [1] 72 13
library(ggplot2)
library(ggsci)
library(cowplot)

#all

# factor to reorder plot
metadata$Location <- factor(metadata$Location)
metadata$Source <- factor(metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
metadata$Specimen <- factor(metadata$Specimen)

# plot
samples_plot_all <- ggplot(metadata, aes(x = Specimen, y = ..count.., fill = Source)) +
  geom_bar(position = "dodge") +
  facet_wrap(~ Location, scales = "free_x") + 
  labs(title = "Samples by Location and Compartment", y = "Number of Samples", x = "Specimen") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate labels 
  scale_fill_jco()


# fltr

# factor to reorder plot
metadata_fltr$Location <- factor(metadata_fltr$Location)
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
metadata_fltr$Specimen <- factor(metadata_fltr$Specimen)

#plot
samples_plot_fltr <- ggplot(metadata_fltr, aes(x = Specimen, y = ..count.., fill = Source)) +
  geom_bar(position = "dodge") +
  facet_wrap(~ Location, scales = "free_x") + 
  labs(title = "Samples by Location and Compartment", y = "Number of Samples", x = "Specimen") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate labels 
  scale_fill_jco()

samples_distribution_plot <- plot_grid(samples_plot_all, samples_plot_fltr, labels = c("A", "B"), ncol = 1, rel_heights = c(1, 1))
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#save plot
ggsave("results/plots/02.diversity/01.samples_distribution.png", samples_distribution_plot , width = 8, height = 7)

# show 
samples_distribution_plot

# Filter otu table

# Create vector with identifiers of samples to keep
samples_to_keep <- metadata_fltr$`sample-id`

# Filter OTU table 
otu_table_fltr <- otu_table[, samples_to_keep, drop = FALSE]

# check
dim(otu_table)
## [1] 6261   83
dim(otu_table_fltr)
## [1] 6261   72

8.2 Create and filter Phyloseq

library(phyloseq)
library(qiime2R)

#create phyloseq object

ps <- qza_to_phyloseq(
  features = "results/02.QIIME2/merged-table-filters-freq52-97.qza",
  tree = "results/02.QIIME2/rooted-tree-iqtree.qza",
  taxonomy = "results/02.QIIME2/taxonomy_merge.qza",
  metadata = "data/metadata.tsv")
library(phyloseq)
library(dplyr)

# Filter samples according previous filters
# Extract metadata
meta_data <- as.data.frame(sample_data(ps))

# Filter
filtered_meta_data <- meta_data %>%
  group_by(Specimen) %>%
  mutate(n_compartments = n_distinct(Source)) %>%
  filter(n_compartments >= 2) %>%
  ungroup()

# Empty check point
if(nrow(filtered_meta_data) == 0) {
  stop("No samples meet the criteria")
}

# get samples
sample_ids_to_keep <- filtered_meta_data$sample

# Sample of interest check point
if(length(sample_ids_to_keep) == 0) {
  stop("No valid sample IDs were found. Check your metadata processing steps.")
}

# Prune samples from the phyloseq object
ps2 <- prune_samples(sample_ids_to_keep, ps)

# show
print(ps)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6261 taxa and 83 samples ]
## sample_data() Sample Data:       [ 83 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6261 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6261 tips and 6260 internal nodes ]
print(ps2)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6261 taxa and 72 samples ]
## sample_data() Sample Data:       [ 72 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6261 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6261 tips and 6260 internal nodes ]

8.3 Clean taxonomy table

library(devtools)
## Loading required package: usethis
library(microbiome)
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2022 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
library(microbiomeutilities)
## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
## 
## Attaching package: 'microbiomeutilities'
## The following object is masked from 'package:microbiome':
## 
##     add_refseq
#check data
microbiome::summarize_phyloseq(ps2)
## Compositional = NO2
## 1] Min. number of reads = 238902] Max. number of reads = 1738693] Total number of reads = 47393804] Average number of reads = 65824.72222222225] Median number of reads = 59703.57] Sparsity = 0.7991002502262686] Any OTU sum to 1 or less? YES8] Number of singletons = 1009] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 23890"
## 
## [[2]]
## [1] "2] Max. number of reads = 173869"
## 
## [[3]]
## [1] "3] Total number of reads = 4739380"
## 
## [[4]]
## [1] "4] Average number of reads = 65824.7222222222"
## 
## [[5]]
## [1] "5] Median number of reads = 59703.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.799100250226268"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
## 
## [[8]]
## [1] "8] Number of singletons = 100"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 12"
## 
## [[11]]
##  [1] "sample"          "Source"          "Date"            "Year"           
##  [5] "Location"        "Week"            "Raw.reads"       "Colect_number"  
##  [9] "Specimen"        "quant_reading"   "Effective_reads" "ASVs"

Remove singletons

#Delete singletons
ps3 <- filter_taxa(ps2, function(x) sum(x) > 1, TRUE)

Remove Phylum with low prevalence

# Explore prevalence
## Get prevalence
prevdf = apply(X = otu_table(ps3),
               MARGIN = ifelse(taxa_are_rows(ps3), yes = 1, no = 2),
               FUN = function(x){sum(x > 0)})
## Add taxonomy
prevdf = data.frame(Prevalence = prevdf,
                    TotalAbundance = taxa_sums(ps3),
                    tax_table(ps3))
## Check prevalence at Phylum level
plyr::ddply(prevdf, "Phylum", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})
plyr::ddply(prevdf, "Genus", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})
# tax to filter
filtertaxa = c("Elusimicrobiota", "GAL15")

# filter
ps4 = subset_taxa(ps3, !Phylum %in% filtertaxa)

Clean names

#check names
#head(microbiome::abundances(ps_fltr))
head(phyloseq::tax_table(ps4))
## Taxonomy Table:     [6 taxa by 7 taxonomic ranks]:
##          Kingdom       Phylum            Class           Order             
## ASV_1034 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_1428 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4955 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_2133 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4953 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_6141 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
##          Family              Genus               Species                   
## ASV_1034 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_1428 "WD2101_soil_group" "WD2101_soil_group" NA                        
## ASV_4955 "WD2101_soil_group" "WD2101_soil_group" NA                        
## ASV_2133 "WD2101_soil_group" "WD2101_soil_group" "uncultured_bacterium"    
## ASV_4953 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_6141 "WD2101_soil_group" "WD2101_soil_group" NA
#Next, cleaning the "d__" and similar values.

# extending gsub to entire table 
tax_table(ps4)[, colnames(tax_table(ps4))] <- gsub(tax_table(ps4)[, colnames(tax_table(ps4))],     pattern = "[a-z]__", replacement = "")

#change ambiguities

tax_table(ps4)[tax_table(ps4) == "uncultured_bacterium"] <- NA
tax_table(ps4)[tax_table(ps4) == "uncultured"] <- NA
tax_table(ps4)[tax_table(ps4) == "metagenome"] <- NA
  #check
head(phyloseq::tax_table(ps4))
## Taxonomy Table:     [6 taxa by 7 taxonomic ranks]:
##          Kingdom    Phylum            Class           Order             
## ASV_1034 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_1428 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4955 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_2133 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4953 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_6141 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
##          Family              Genus               Species                   
## ASV_1034 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_1428 "WD2101_soil_group" "WD2101_soil_group" NA                        
## ASV_4955 "WD2101_soil_group" "WD2101_soil_group" NA                        
## ASV_2133 "WD2101_soil_group" "WD2101_soil_group" NA                        
## ASV_4953 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_6141 "WD2101_soil_group" "WD2101_soil_group" NA

Agglomerate levels

#Aggregate at genus level.

ps_family <- phyloseq::tax_glom(ps4, "Family", NArm = TRUE)
ps_genus <- phyloseq::tax_glom(ps4, "Genus", NArm = TRUE)
ps_species <- phyloseq::tax_glom(ps4, "Species", NArm = TRUE)
ps_genus <- filter_taxa(ps_genus, function(x) sum(x) > 1, TRUE)

head(phyloseq::tax_table(ps_genus))
## Taxonomy Table:     [6 taxa by 7 taxonomic ranks]:
##          Kingdom    Phylum            Class            Order             
## ASV_519  "Bacteria" "Planctomycetota" "Phycisphaerae"  "Tepidisphaerales"
## ASV_5638 "Bacteria" "Planctomycetota" "Phycisphaerae"  "Phycisphaerales" 
## ASV_4626 "Bacteria" "Planctomycetota" "Phycisphaerae"  "mle1-8"          
## ASV_4159 "Bacteria" "Planctomycetota" "OM190"          "OM190"           
## ASV_498  "Bacteria" "Planctomycetota" "Planctomycetes" "Pirellulales"    
## ASV_765  "Bacteria" "Planctomycetota" "Planctomycetes" "Pirellulales"    
##          Family              Genus               Species
## ASV_519  "WD2101_soil_group" "WD2101_soil_group" NA     
## ASV_5638 "Phycisphaeraceae"  "AKYG587"           NA     
## ASV_4626 "mle1-8"            "mle1-8"            NA     
## ASV_4159 "OM190"             "OM190"             NA     
## ASV_498  "Pirellulaceae"     "Pir4_lineage"      NA     
## ASV_765  "Pirellulaceae"     "Pirellula"         NA

Change id by tax name

#Substitute these IDs with names of genus
#ps_genus <- format_to_besthit(ps_genus) #no me gusta el formato, pero sirve

taxa_names(ps_genus) <- tax_table(ps_genus)[,"Genus"]
head(taxa_names(ps_genus))
## [1] "WD2101_soil_group" "AKYG587"           "mle1-8"           
## [4] "OM190"             "Pir4_lineage"      "Pirellula"
head(phyloseq::tax_table(ps_genus))
## Taxonomy Table:     [6 taxa by 7 taxonomic ranks]:
##                   Kingdom    Phylum            Class           
## WD2101_soil_group "Bacteria" "Planctomycetota" "Phycisphaerae" 
## AKYG587           "Bacteria" "Planctomycetota" "Phycisphaerae" 
## mle1-8            "Bacteria" "Planctomycetota" "Phycisphaerae" 
## OM190             "Bacteria" "Planctomycetota" "OM190"         
## Pir4_lineage      "Bacteria" "Planctomycetota" "Planctomycetes"
## Pirellula         "Bacteria" "Planctomycetota" "Planctomycetes"
##                   Order              Family              Genus              
## WD2101_soil_group "Tepidisphaerales" "WD2101_soil_group" "WD2101_soil_group"
## AKYG587           "Phycisphaerales"  "Phycisphaeraceae"  "AKYG587"          
## mle1-8            "mle1-8"           "mle1-8"            "mle1-8"           
## OM190             "OM190"            "OM190"             "OM190"            
## Pir4_lineage      "Pirellulales"     "Pirellulaceae"     "Pir4_lineage"     
## Pirellula         "Pirellulales"     "Pirellulaceae"     "Pirellula"        
##                   Species
## WD2101_soil_group NA     
## AKYG587           NA     
## mle1-8            NA     
## OM190             NA     
## Pir4_lineage      NA     
## Pirellula         NA

Clean rare characters

# Remove characters
taxa_names(ps_genus) <- gsub(" ", ".", taxa_names(ps_genus))
taxa_names(ps_genus) <- gsub("\\(|\\)", "", taxa_names(ps_genus))

#check
unique(tax_table(ps_genus)[,"Genus"] )
## Taxonomy Table:     [418 taxa by 1 taxonomic ranks]:
##                                                    Genus                                               
## WD2101_soil_group                                  "WD2101_soil_group"                                 
## AKYG587                                            "AKYG587"                                           
## mle1-8                                             "mle1-8"                                            
## OM190                                              "OM190"                                             
## Pir4_lineage                                       "Pir4_lineage"                                      
## Pirellula                                          "Pirellula"                                         
## SH-PL14                                            "SH-PL14"                                           
## Planctomicrobium                                   "Planctomicrobium"                                  
## Schlesneria                                        "Schlesneria"                                       
## Gemmata                                            "Gemmata"                                           
## Fimbriiglobus                                      "Fimbriiglobus"                                     
## Zavarzinella                                       "Zavarzinella"                                      
## Gemmataceae                                        "Gemmataceae"                                       
## Aquisphaera                                        "Aquisphaera"                                       
## Singulisphaera                                     "Singulisphaera"                                    
## Tundrisphaera                                      "Tundrisphaera"                                     
## Latescibacterota                                   "Latescibacterota"                                  
## Latescibacteraceae                                 "Latescibacteraceae"                                
## Terrimicrobium                                     "Terrimicrobium"                                    
## Candidatus_Udaeobacter                             "Candidatus_Udaeobacter"                            
## Candidatus_Xiphinematobacter                       "Candidatus_Xiphinematobacter"                      
## Chthoniobacter                                     "Chthoniobacter"                                    
## Pedosphaeraceae                                    "Pedosphaeraceae"                                   
## ADurb.Bin063-1                                     "ADurb.Bin063-1"                                    
## DEV008                                             "DEV008"                                            
## Ellin517                                           "Ellin517"                                          
## Pedosphaera                                        "Pedosphaera"                                       
## Ellin516                                           "Ellin516"                                          
## Candidatus_Kaiserbacteria                          "Candidatus_Kaiserbacteria"                         
## Opitutus                                           "Opitutus"                                          
## Lacunisphaera                                      "Lacunisphaera"                                     
## Cerasicoccus                                       "Cerasicoccus"                                      
## Subgroup_17                                        "Subgroup_17"                                       
## Subgroup_25                                        "Subgroup_25"                                       
## Subgroup_9                                         "Subgroup_9"                                        
## Vicinamibacteraceae                                "Vicinamibacteraceae"                               
## Luteitalea                                         "Luteitalea"                                        
## Vicinamibacter                                     "Vicinamibacter"                                    
## Edaphobacter                                       "Edaphobacter"                                      
## Terriglobus                                        "Terriglobus"                                       
## Granulicella                                       "Granulicella"                                      
## Acidipila                                          "Acidipila"                                         
## Acidicapsa                                         "Acidicapsa"                                        
## Occallatibacter                                    "Occallatibacter"                                   
## Terracidiphilus                                    "Terracidiphilus"                                   
## Candidatus_Koribacter                              "Candidatus_Koribacter"                             
## Subgroup_13                                        "Subgroup_13"                                       
## Subgroup_5                                         "Subgroup_5"                                        
## Subgroup_11                                        "Subgroup_11"                                       
## Neochlamydia                                       "Neochlamydia"                                      
## Candidatus_Protochlamydia                          "Candidatus_Protochlamydia"                         
## cvE6                                               "cvE6"                                              
## Pla4_lineage                                       "Pla4_lineage"                                      
## Ilumatobacter                                      "Ilumatobacter"                                     
## CL500-29_marine_group                              "CL500-29_marine_group"                             
## Iamia                                              "Iamia"                                             
## IMCC26256                                          "IMCC26256"                                         
## 0319-7L14                                          "0319-7L14"                                         
## Actinomadura                                       "Actinomadura"                                      
## Actinoallomurus                                    "Actinoallomurus"                                   
## Acidothermus                                       "Acidothermus"                                      
## Actinocorallia                                     "Actinocorallia"                                    
## Acrocarpospora                                     "Acrocarpospora"                                    
## Planomonospora                                     "Planomonospora"                                    
## Sphaerisporangium                                  "Sphaerisporangium"                                 
## Streptosporangiaceae                               "Streptosporangiaceae"                              
## Nonomuraea                                         "Nonomuraea"                                        
## Streptosporangium                                  "Streptosporangium"                                 
## Streptomyces                                       "Streptomyces"                                      
## Streptacidiphilus                                  "Streptacidiphilus"                                 
## Kitasatospora                                      "Kitasatospora"                                     
## Jiangella                                          "Jiangella"                                         
## Aeromicrobium                                      "Aeromicrobium"                                     
## Marmoricola                                        "Marmoricola"                                       
## Nocardioides                                       "Nocardioides"                                      
## Microlunatus                                       "Microlunatus"                                      
## Cutibacterium                                      "Cutibacterium"                                     
## Actinopolymorpha                                   "Actinopolymorpha"                                  
## Flindersiella                                      "Flindersiella"                                     
## Kribbella                                          "Kribbella"                                         
## Tenggerimyces                                      "Tenggerimyces"                                     
## Actinospica                                        "Actinospica"                                       
## Catenulispora                                      "Catenulispora"                                     
## Lechevalieria                                      "Lechevalieria"                                     
## Saccharothrix                                      "Saccharothrix"                                     
## Amycolatopsis                                      "Amycolatopsis"                                     
## Actinophytocola                                    "Actinophytocola"                                   
## Kutzneria                                          "Kutzneria"                                         
## Actinomycetospora                                  "Actinomycetospora"                                 
## Pseudonocardia                                     "Pseudonocardia"                                    
## Kibdelosporangium                                  "Kibdelosporangium"                                 
## Hamadaea                                           "Hamadaea"                                          
## Rugosimonospora                                    "Rugosimonospora"                                   
## Catellatospora                                     "Catellatospora"                                    
## Actinoplanes                                       "Actinoplanes"                                      
## Rhizocola                                          "Rhizocola"                                         
## Allocatelliglobosispora                            "Allocatelliglobosispora"                           
## Longispora                                         "Longispora"                                        
## Virgisporangium                                    "Virgisporangium"                                   
## Dactylosporangium                                  "Dactylosporangium"                                 
## Krasilnikovia                                      "Krasilnikovia"                                     
## Luedemannella                                      "Luedemannella"                                     
## Micromonospora                                     "Micromonospora"                                    
## Asanoa                                             "Asanoa"                                            
## Mycobacterium                                      "Mycobacterium"                                     
## Corynebacterium                                    "Corynebacterium"                                   
## Nocardia                                           "Nocardia"                                          
## Rhodococcus                                        "Rhodococcus"                                       
## Jatrophihabitans                                   "Jatrophihabitans"                                  
## Cryptosporangium                                   "Cryptosporangium"                                  
## Frankia                                            "Frankia"                                           
## Crossiella                                         "Crossiella"                                        
## Saccharopolyspora                                  "Saccharopolyspora"                                 
## Frankiales                                         "Frankiales"                                        
## Geodermatophilus                                   "Geodermatophilus"                                  
## Blastococcus                                       "Blastococcus"                                      
## Modestobacter                                      "Modestobacter"                                     
## Nakamurella                                        "Nakamurella"                                       
## Intrasporangium                                    "Intrasporangium"                                   
## Pseudarthrobacter                                  "Pseudarthrobacter"                                 
## Paenarthrobacter                                   "Paenarthrobacter"                                  
## Agromyces                                          "Agromyces"                                         
## Leifsonia                                          "Leifsonia"                                         
## Microbacterium                                     "Microbacterium"                                    
## Curtobacterium                                     "Curtobacterium"                                    
## Cellulomonas                                       "Cellulomonas"                                      
## Isoptericola                                       "Isoptericola"                                      
## Sporichthya                                        "Sporichthya"                                       
## 67-14                                              "67-14"                                             
## Conexibacter                                       "Conexibacter"                                      
## Solirubrobacter                                    "Solirubrobacter"                                   
## Parviterribacter                                   "Parviterribacter"                                  
## Solirubrobacteraceae                               "Solirubrobacteraceae"                              
## Gaiella                                            "Gaiella"                                           
## MB-A2-108                                          "MB-A2-108"                                         
## Rubrobacter                                        "Rubrobacter"                                       
## Gitt-GS-136                                        "Gitt-GS-136"                                       
## KD4-96                                             "KD4-96"                                            
## Thermosporothrix                                   "Thermosporothrix"                                  
## 1959-1                                             "1959-1"                                            
## Ktedonobacter                                      "Ktedonobacter"                                     
## Ktedonobacterales                                  "Ktedonobacterales"                                 
## JG30-KF-AS9                                        "JG30-KF-AS9"                                       
## JG30-KF-CM45                                       "JG30-KF-CM45"                                      
## AKYG1722                                           "AKYG1722"                                          
## FFCH7168                                           "FFCH7168"                                          
## AKIW781                                            "AKIW781"                                           
## C0119                                              "C0119"                                             
## S085                                               "S085"                                              
## OLB14                                              "OLB14"                                             
## JG30-KF-CM66                                       "JG30-KF-CM66"                                      
## SAR202_clade                                       "SAR202_clade"                                      
## TK10                                               "TK10"                                              
## Litorilinea                                        "Litorilinea"                                       
## A4b                                                "A4b"                                               
## OLB13                                              "OLB13"                                             
## UTCFX1                                             "UTCFX1"                                            
## RBG-13-54-9                                        "RBG-13-54-9"                                       
## SBR1031                                            "SBR1031"                                           
## Domibacillus                                       "Domibacillus"                                      
## Brevibacillus                                      "Brevibacillus"                                     
## Geobacillus                                        "Geobacillus"                                       
## Lysinibacillus                                     "Lysinibacillus"                                    
## Kurthia                                            "Kurthia"                                           
## Paenisporosarcina                                  "Paenisporosarcina"                                 
## Staphylococcus                                     "Staphylococcus"                                    
## Tepidibacillus                                     "Tepidibacillus"                                    
## Fictibacillus                                      "Fictibacillus"                                     
## Bacillus                                           "Bacillus"                                          
## Terribacillus                                      "Terribacillus"                                     
## Lactococcus                                        "Lactococcus"                                       
## Ammoniphilus                                       "Ammoniphilus"                                      
## Paenibacillus                                      "Paenibacillus"                                     
## Cohnella                                           "Cohnella"                                          
## Baia                                               "Baia"                                              
## Tumebacillus                                       "Tumebacillus"                                      
## AD3                                                "AD3"                                               
## Sporomusa                                          "Sporomusa"                                         
## Anaerospora                                        "Anaerospora"                                       
## Obscuribacteraceae                                 "Obscuribacteraceae"                                
## Oxynema_BDU_92071                                  "Oxynema_BDU_92071"                                 
## WPS-2                                              "WPS-2"                                             
## Bryobacter                                         "Bryobacter"                                        
## Candidatus_Solibacter                              "Candidatus_Solibacter"                             
## Paraclostridium                                    "Paraclostridium"                                   
## Romboutsia                                         "Romboutsia"                                        
## Clostridium_sensu_stricto_8                        "Clostridium_sensu_stricto_8"                       
## Clostridium_sensu_stricto_12                       "Clostridium_sensu_stricto_12"                      
## Clostridium_sensu_stricto_9                        "Clostridium_sensu_stricto_9"                       
## Clostridium_sensu_stricto_3                        "Clostridium_sensu_stricto_3"                       
## Clostridium_sensu_stricto_13                       "Clostridium_sensu_stricto_13"                      
## Herbinix                                           "Herbinix"                                          
## Anaerocolumna                                      "Anaerocolumna"                                     
## Saccharimonadales                                  "Saccharimonadales"                                 
## TM7a                                               "TM7a"                                              
## LWQ8                                               "LWQ8"                                              
## TM7                                                "TM7"                                               
## Fimbriimonadaceae                                  "Fimbriimonadaceae"                                 
## Armatimonadales                                    "Armatimonadales"                                   
## Kapabacteriales                                    "Kapabacteriales"                                   
## Ruminiclostridium                                  "Ruminiclostridium"                                 
## Anaerobacterium                                    "Anaerobacterium"                                   
## RB41                                               "RB41"                                              
## JGI_0001001-H03                                    "JGI_0001001-H03"                                   
## Blastocatella                                      "Blastocatella"                                     
## Aridibacter                                        "Aridibacter"                                       
## Stenotrophobacter                                  "Stenotrophobacter"                                 
## 11-24                                              "11-24"                                             
## DS-100                                             "DS-100"                                            
## Subgroup_2                                         "Subgroup_2"                                        
## Rokubacteriales                                    "Rokubacteriales"                                   
## WX65                                               "WX65"                                              
## Subgroup_7                                         "Subgroup_7"                                        
## Vermiphilaceae                                     "Vermiphilaceae"                                    
## Subgroup_10                                        "Subgroup_10"                                       
## Subgroup_22                                        "Subgroup_22"                                       
## Subgroup_18                                        "Subgroup_18"                                       
## Entotheonellaceae                                  "Entotheonellaceae"                                 
## Candidatus_Entotheonella                           "Candidatus_Entotheonella"                          
## Gemmatimonas                                       "Gemmatimonas"                                      
## Roseisolibacter                                    "Roseisolibacter"                                   
## S0134_terrestrial_group                            "S0134_terrestrial_group"                           
## BD2-11_terrestrial_group                           "BD2-11_terrestrial_group"                          
## YC-ZSS-LKJ147                                      "YC-ZSS-LKJ147"                                     
## Longimicrobiaceae                                  "Longimicrobiaceae"                                 
## Minicystis                                         "Minicystis"                                        
## Polyangium                                         "Polyangium"                                        
## Eel-36e1D6                                         "Eel-36e1D6"                                        
## BIrii41                                            "BIrii41"                                           
## Phaselicystis                                      "Phaselicystis"                                     
## mle1-27                                            "mle1-27"                                           
## Pajaroellobacter                                   "Pajaroellobacter"                                  
## Haliangium                                         "Haliangium"                                        
## Pseudenhygromyxa                                   "Pseudenhygromyxa"                                  
## Nannocystis                                        "Nannocystis"                                       
## Myxococcus                                         "Myxococcus"                                        
## Archangium                                         "Archangium"                                        
## P3OB-42                                            "P3OB-42"                                           
## Anaeromyxobacter                                   "Anaeromyxobacter"                                  
## Blfdi19                                            "Blfdi19"                                           
## NB1-j                                              "NB1-j"                                             
## OM27_clade                                         "OM27_clade"                                        
## Geobacter                                          "Geobacter"                                         
## Bdellovibrio                                       "Bdellovibrio"                                      
## MBNT15                                             "MBNT15"                                            
## bacteriap25                                        "bacteriap25"                                       
## RCP2-54                                            "RCP2-54"                                           
## Holophaga                                          "Holophaga"                                         
## Babeliales                                         "Babeliales"                                        
## Nitrospira                                         "Nitrospira"                                        
## 4-29-1                                             "4-29-1"                                            
## PMMR1                                              "PMMR1"                                             
## Asticcacaulis                                      "Asticcacaulis"                                     
## Caulobacter                                        "Caulobacter"                                       
## Phenylobacterium                                   "Phenylobacterium"                                  
## A0839                                              "A0839"                                             
## Nordella                                           "Nordella"                                          
## Rhodomicrobium                                     "Rhodomicrobium"                                    
## Pedomicrobium                                      "Pedomicrobium"                                     
## Hyphomicrobium                                     "Hyphomicrobium"                                    
## Labrys                                             "Labrys"                                            
## Phreatobacter                                      "Phreatobacter"                                     
## Amphiplicatus                                      "Amphiplicatus"                                     
## Bosea                                              "Bosea"                                             
## Neo-b11                                            "Neo-b11"                                           
## Methylobacterium-Methylorubrum                     "Methylobacterium-Methylorubrum"                    
## Microvirga                                         "Microvirga"                                        
## FFCH5858                                           "FFCH5858"                                          
## Xanthobacteraceae                                  "Xanthobacteraceae"                                 
## Bauldia                                            "Bauldia"                                           
## alphaI_cluster                                     "alphaI_cluster"                                    
## Methylovirgula                                     "Methylovirgula"                                    
## 28-YEA-48                                          "28-YEA-48"                                         
## D05-2                                              "D05-2"                                             
## Rubellimicrobium                                   "Rubellimicrobium"                                  
## Amaricoccus                                        "Amaricoccus"                                       
## Kaistia                                            "Kaistia"                                           
## Hirschia                                           "Hirschia"                                          
## SWB02                                              "SWB02"                                             
## Roseiarcus                                         "Roseiarcus"                                        
## Amb-16S-1323                                       "Amb-16S-1323"                                      
## Pseudorhodoplanes                                  "Pseudorhodoplanes"                                 
## Pseudolabrys                                       "Pseudolabrys"                                      
## Bradyrhizobium                                     "Bradyrhizobium"                                    
## Rhodoplanes                                        "Rhodoplanes"                                       
## Starkeya                                           "Starkeya"                                          
## Devosia                                            "Devosia"                                           
## Methyloligellaceae                                 "Methyloligellaceae"                                
## Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"
## Mesorhizobium                                      "Mesorhizobium"                                     
## Ensifer                                            "Ensifer"                                           
## Ochrobactrum                                       "Ochrobactrum"                                      
## Phyllobacterium                                    "Phyllobacterium"                                   
## Aureimonas                                         "Aureimonas"                                        
## KF-JG30-B3                                         "KF-JG30-B3"                                        
## Telmatospirillum                                   "Telmatospirillum"                                  
## Sphingomonas                                       "Sphingomonas"                                      
## Ellin6055                                          "Ellin6055"                                         
## Novosphingobium                                    "Novosphingobium"                                   
## Altererythrobacter                                 "Altererythrobacter"                                
## Qipengyuania                                       "Qipengyuania"                                      
## Sphingobium                                        "Sphingobium"                                       
## Haematospirillum                                   "Haematospirillum"                                  
## Aliidongia                                         "Aliidongia"                                        
## Reyranella                                         "Reyranella"                                        
## Candidatus_Paracaedibacter                         "Candidatus_Paracaedibacter"                        
## Dongia                                             "Dongia"                                            
## Acidisoma                                          "Acidisoma"                                         
## Craurococcus-Caldovatus                            "Craurococcus-Caldovatus"                           
## Rhodovastum                                        "Rhodovastum"                                       
## Rhodopila                                          "Rhodopila"                                         
## Acidocella                                         "Acidocella"                                        
## Inquilinus                                         "Inquilinus"                                        
## Azospirillum                                       "Azospirillum"                                      
## Skermanella                                        "Skermanella"                                       
## Constrictibacter                                   "Constrictibacter"                                  
## Candidatus_Alysiosphaera                           "Candidatus_Alysiosphaera"                          
## URHD0088                                           "URHD0088"                                          
## Steroidobacter                                     "Steroidobacter"                                    
## R7C24                                              "R7C24"                                             
## JG36-TzT-191                                       "JG36-TzT-191"                                      
## Acidibacter                                        "Acidibacter"                                       
## WD260                                              "WD260"                                             
## KF-JG30-C25                                        "KF-JG30-C25"                                       
## PLTA13                                             "PLTA13"                                            
## CCD24                                              "CCD24"                                             
## Legionella                                         "Legionella"                                        
## Candidatus_Ovatusbacter                            "Candidatus_Ovatusbacter"                           
## JTB255_marine_benthic_group                        "JTB255_marine_benthic_group"                       
## Pantoea                                            "Pantoea"                                           
## Escherichia-Shigella                               "Escherichia-Shigella"                              
## Serratia                                           "Serratia"                                          
## Enterobacter                                       "Enterobacter"                                      
## Klebsiella                                         "Klebsiella"                                        
## KI89A_clade                                        "KI89A_clade"                                       
## Gammaproteobacteria                                "Gammaproteobacteria"                               
## Pseudomonas                                        "Pseudomonas"                                       
## OM60NOR5_clade                                     "OM60(NOR5)_clade"                                  
## Enhydrobacter                                      "Enhydrobacter"                                     
## Acinetobacter                                      "Acinetobacter"                                     
## Alkanindiges                                       "Alkanindiges"                                      
## Cavicella                                          "Cavicella"                                         
## Aquicella                                          "Aquicella"                                         
## Dinghuibacter                                      "Dinghuibacter"                                     
## Chitinophaga                                       "Chitinophaga"                                      
## Flavihumibacter                                    "Flavihumibacter"                                   
## Parafilimonas                                      "Parafilimonas"                                     
## Filimonas                                          "Filimonas"                                         
## Terrimonas                                         "Terrimonas"                                        
## Ferruginibacter                                    "Ferruginibacter"                                   
## UTBCD1                                             "UTBCD1"                                            
## Puia                                               "Puia"                                              
## Flavitalea                                         "Flavitalea"                                        
## Niastella                                          "Niastella"                                         
## Flavisolibacter                                    "Flavisolibacter"                                   
## Edaphobaculum                                      "Edaphobaculum"                                     
## Taibaiella                                         "Taibaiella"                                        
## Sphingobacterium                                   "Sphingobacterium"                                  
## Olivibacter                                        "Olivibacter"                                       
## Mucilaginibacter                                   "Mucilaginibacter"                                  
## Solitalea                                          "Solitalea"                                         
## env.OPS_17                                         "env.OPS_17"                                        
## AKYH767                                            "AKYH767"                                           
## NS11-12_marine_group                               "NS11-12_marine_group"                              
## CWT_CU03-E12                                       "CWT_CU03-E12"                                      
## Bacteroidetes_VC2.1_Bac22                          "Bacteroidetes_VC2.1_Bac22"                         
## Chryseobacterium                                   "Chryseobacterium"                                  
## Cloacibacterium                                    "Cloacibacterium"                                   
## Flavobacterium                                     "Flavobacterium"                                    
## Candidatus_Brownia                                 "Candidatus_Brownia"                                
## Rhodocytophaga                                     "Rhodocytophaga"                                    
## Dyadobacter                                        "Dyadobacter"                                       
## Chryseolinea                                       "Chryseolinea"                                      
## Ohtaekwangia                                       "Ohtaekwangia"                                      
## OLB12                                              "OLB12"                                             
## Adhaeribacter                                      "Adhaeribacter"                                     
## Nevskia                                            "Nevskia"                                           
## Pseudoxanthomonas                                  "Pseudoxanthomonas"                                 
## Stenotrophomonas                                   "Stenotrophomonas"                                  
## Xanthomonas                                        "Xanthomonas"                                       
## Fulvimonas                                         "Fulvimonas"                                        
## Rudaea                                             "Rudaea"                                            
## Dyella                                             "Dyella"                                            
## Luteibacter                                        "Luteibacter"                                       
## Rhodanobacter                                      "Rhodanobacter"                                     
## Arenimonas                                         "Arenimonas"                                        
## Lysobacter                                         "Lysobacter"                                        
## Massilia                                           "Massilia"                                          
## Silvimonas                                         "Silvimonas"                                        
## Piscinibacter                                      "Piscinibacter"                                     
## Rhizobacter                                        "Rhizobacter"                                       
## Variovorax                                         "Variovorax"                                        
## Caenimonas                                         "Caenimonas"                                        
## Curvibacter                                        "Curvibacter"                                       
## Ramlibacter                                        "Ramlibacter"                                       
## Azohydromonas                                      "Azohydromonas"                                     
## Paucibacter                                        "Paucibacter"                                       
## Pelomonas                                          "Pelomonas"                                         
## Roseateles                                         "Roseateles"                                        
## Comamonas                                          "Comamonas"                                         
## Pandoraea                                          "Pandoraea"                                         
## Bordetella                                         "Bordetella"                                        
## Achromobacter                                      "Achromobacter"                                     
## Noviherbaspirillum                                 "Noviherbaspirillum"                                
## Cupriavidus                                        "Cupriavidus"                                       
## Herbaspirillum                                     "Herbaspirillum"                                    
## mle1-7                                             "mle1-7"                                            
## TRA3-20                                            "TRA3-20"                                           
## GOUTA6                                             "GOUTA6"                                            
## MND1                                               "MND1"                                              
## IS-44                                              "IS-44"                                             
## A21b                                               "A21b"                                              
## SC-I-84                                            "SC-I-84"                                           
## Nitrosospira                                       "Nitrosospira"                                      
## B1-7BS                                             "B1-7BS"                                            
## Niveibacterium                                     "Niveibacterium"                                    
## Ellin6067                                          "Ellin6067"                                         
## Burkholderia-Caballeronia-Paraburkholderia         "Burkholderia-Caballeronia-Paraburkholderia"

8.4 Subset location phyloseq objects

# check data
table(meta(ps4)$Location, meta(ps4)$Source)
##                                
##                                 Rhizhomorph Soil associated Stipe
##   El Castillo/Zentla                      8               9     9
##   Monte obscuro Emiliano Zapata           2               7     7
##   Naolinco                               10              10    10
#subsets
ps_ElCastillo <- subset_samples(ps4, Location == "El Castillo/Zentla")

ps_MonteObscuro <- subset_samples(ps4, Location == "Monte obscuro Emiliano Zapata")

ps_Naolinco <- subset_samples(ps4, Location == "Naolinco")

ps_ElCastillo
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6159 taxa and 26 samples ]
## sample_data() Sample Data:       [ 26 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps_MonteObscuro
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6159 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps_Naolinco
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6159 taxa and 30 samples ]
## sample_data() Sample Data:       [ 30 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
#check data
microbiome::summarize_phyloseq(ps_ElCastillo)
## Compositional = NO2
## 1] Min. number of reads = 322442] Max. number of reads = 676833] Total number of reads = 13117814] Average number of reads = 50453.11538461545] Median number of reads = 516897] Sparsity = 0.82902444202986] Any OTU sum to 1 or less? YES8] Number of singletons = 28859] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 32244"
## 
## [[2]]
## [1] "2] Max. number of reads = 67683"
## 
## [[3]]
## [1] "3] Total number of reads = 1311781"
## 
## [[4]]
## [1] "4] Average number of reads = 50453.1153846154"
## 
## [[5]]
## [1] "5] Median number of reads = 51689"
## 
## [[6]]
## [1] "7] Sparsity = 0.8290244420298"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
## 
## [[8]]
## [1] "8] Number of singletons = 2885"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 12"
## 
## [[11]]
##  [1] "sample"          "Source"          "Date"            "Year"           
##  [5] "Location"        "Week"            "Raw.reads"       "Colect_number"  
##  [9] "Specimen"        "quant_reading"   "Effective_reads" "ASVs"
microbiome::summarize_phyloseq(ps_MonteObscuro)
## Compositional = NO2
## 1] Min. number of reads = 238902] Max. number of reads = 682953] Total number of reads = 7181394] Average number of reads = 44883.68755] Median number of reads = 414317] Sparsity = 0.7536531904529966] Any OTU sum to 1 or less? YES8] Number of singletons = 24069] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 23890"
## 
## [[2]]
## [1] "2] Max. number of reads = 68295"
## 
## [[3]]
## [1] "3] Total number of reads = 718139"
## 
## [[4]]
## [1] "4] Average number of reads = 44883.6875"
## 
## [[5]]
## [1] "5] Median number of reads = 41431"
## 
## [[6]]
## [1] "7] Sparsity = 0.753653190452996"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
## 
## [[8]]
## [1] "8] Number of singletons = 2406"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 12"
## 
## [[11]]
##  [1] "sample"          "Source"          "Date"            "Year"           
##  [5] "Location"        "Week"            "Raw.reads"       "Colect_number"  
##  [9] "Specimen"        "quant_reading"   "Effective_reads" "ASVs"
microbiome::summarize_phyloseq(ps_Naolinco)
## Compositional = NO2
## 1] Min. number of reads = 442602] Max. number of reads = 1738693] Total number of reads = 27092974] Average number of reads = 90309.95] Median number of reads = 874747] Sparsity = 0.7894625750933596] Any OTU sum to 1 or less? YES8] Number of singletons = 32619] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 44260"
## 
## [[2]]
## [1] "2] Max. number of reads = 173869"
## 
## [[3]]
## [1] "3] Total number of reads = 2709297"
## 
## [[4]]
## [1] "4] Average number of reads = 90309.9"
## 
## [[5]]
## [1] "5] Median number of reads = 87474"
## 
## [[6]]
## [1] "7] Sparsity = 0.789462575093359"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
## 
## [[8]]
## [1] "8] Number of singletons = 3261"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 12"
## 
## [[11]]
##  [1] "sample"          "Source"          "Date"            "Year"           
##  [5] "Location"        "Week"            "Raw.reads"       "Colect_number"  
##  [9] "Specimen"        "quant_reading"   "Effective_reads" "ASVs"

Remove singletons

#Delete singletons
ps_ElCastillo <- filter_taxa(ps_ElCastillo, function(x) sum(x) > 1, TRUE)
ps_MonteObscuro <- filter_taxa(ps_MonteObscuro, function(x) sum(x) > 1, TRUE)
ps_Naolinco <- filter_taxa(ps_Naolinco, function(x) sum(x) > 1, TRUE)

#check
ps_ElCastillo
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3274 taxa and 26 samples ]
## sample_data() Sample Data:       [ 26 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 3274 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 3274 tips and 3273 internal nodes ]
ps_MonteObscuro
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3753 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 3753 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 3753 tips and 3752 internal nodes ]
ps_Naolinco
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2898 taxa and 30 samples ]
## sample_data() Sample Data:       [ 30 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 2898 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2898 tips and 2897 internal nodes ]
# Check 
table(meta(ps_ElCastillo_rel_f)$Source)
## 
##     Rhizhomorph Soil associated           Stipe 
##               8               9               9
table(meta(ps_MonteObscuro_rel_f)$Source)
## 
##     Rhizhomorph Soil associated           Stipe 
##               2               7               7
table(meta(ps_Naolinco_rel_f)$Source)
## 
##     Rhizhomorph Soil associated           Stipe 
##              10              10              10

8.5 Relative abundance phyloseq objects

library(microbiome)
library(microbiomeutilities)

#function
convert_to_compositional_and_index_reformat <- function(ps) {
  ps_rel <- microbiome::transform(ps, "compositional")
  ps_rel_f <- format_to_besthit(ps_rel)
  return(ps_rel_f)
}

# phyloseq objects
phyloseq_objects <- list(
  ps_ElCastillo = ps_ElCastillo,
  ps_MonteObscuro = ps_MonteObscuro,
  ps_Naolinco = ps_Naolinco,
  ps4 = ps4
)

# Apply
phyloseq_objects_rel <- lapply(phyloseq_objects, convert_to_compositional_and_index_reformat)

# Subset
ps_ElCastillo_rel_f <- phyloseq_objects_rel$ps_ElCastillo
ps_MonteObscuro_rel_f <- phyloseq_objects_rel$ps_MonteObscuro
ps_Naolinco_rel_f <- phyloseq_objects_rel$ps_Naolinco
ps_full_rel_f <- phyloseq_objects_rel$ps4

8.6 Full core object

# Get full core with microbiome
full_ps_core_relative <- core(ps_full_rel_f, 0, 0.5)
full_ps_core_counts <- core(ps4, 0, 0.5)
# Collapse the rare taxa into an “Other” category
#full_ps_core_fltOthers <- aggregate_rare(ps4_rel, "Genus", detection = 0, prevalence = 0.5)

# Compare phyloseq objects
ps_full_rel_f
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6159 taxa and 72 samples ]
## sample_data() Sample Data:       [ 72 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6159 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps4
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6159 taxa and 72 samples ]
## sample_data() Sample Data:       [ 72 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
full_ps_core_relative
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 365 taxa and 72 samples ]
## sample_data() Sample Data:       [ 72 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 365 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 365 tips and 364 internal nodes ]
full_ps_core_counts
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 365 taxa and 72 samples ]
## sample_data() Sample Data:       [ 72 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 365 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 365 tips and 364 internal nodes ]
microbiome::summarize_phyloseq(full_ps_core_counts)
## Compositional = NO2
## 1] Min. number of reads = 32892] Max. number of reads = 1320233] Total number of reads = 22961414] Average number of reads = 31890.84722222225] Median number of reads = 26767.57] Sparsity = 0.3817351598173526] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 3289"
## 
## [[2]]
## [1] "2] Max. number of reads = 132023"
## 
## [[3]]
## [1] "3] Total number of reads = 2296141"
## 
## [[4]]
## [1] "4] Average number of reads = 31890.8472222222"
## 
## [[5]]
## [1] "5] Median number of reads = 26767.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.381735159817352"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 12"
## 
## [[11]]
##  [1] "sample"          "Source"          "Date"            "Year"           
##  [5] "Location"        "Week"            "Raw.reads"       "Colect_number"  
##  [9] "Specimen"        "quant_reading"   "Effective_reads" "ASVs"

8.7 Create Ampvis Objects

Create full, core and location ampvis objects

library(tibble)
library(ampvis2)
# Ampvis object function
create_ampvis_object <- function(pseq) {
  # create and extract otu table
  otu_table_ampvis <- data.frame(OTU = rownames(phyloseq::otu_table(pseq)@.Data),
                                 phyloseq::otu_table(pseq)@.Data,
                                 phyloseq::tax_table(pseq)@.Data,
                                 check.names = FALSE)

  # Metadata
  meta_data_ampvis <- data.frame(phyloseq::sample_data(pseq),
                                 check.names = FALSE)

  # change index by SampleID
  meta_data_ampvis <- meta_data_ampvis %>% rownames_to_column(var = "SampleID")

  # ampvis object
  av2 <- amp_load(otu_table_ampvis, meta_data_ampvis)
  
  return(av2)
}
# phyloseq objects
phyloseq_objects <- list(ps4 = ps4, ps_ElCastillo = ps_ElCastillo, ps_MonteObscuro = ps_MonteObscuro, ps_Naolinco = ps_Naolinco, full_ps_core_counts = full_ps_core_counts)

# Apply function
ampvis_objects <- lapply(phyloseq_objects, create_ampvis_object)

ampvis_objects
## $ps4
## ampvis2 object with 3 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           72         6159      4739217        23890       173869      59703.5 
##    Avg#Reads 
##     65822.46 
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   6159(100%)   6159(100%)  6128(99.5%) 6058(98.36%) 5483(89.02%) 4442(72.12%) 
##      Species 
## 1182(19.19%) 
## 
## Metadata variables: 13 
##  SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
## 
## $ps_ElCastillo
## ampvis2 object with 3 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           26         3274      1311781        32244        67683        51689 
##    Avg#Reads 
##     50453.12 
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   3274(100%)   3274(100%) 3256(99.45%) 3216(98.23%) 2907(88.79%) 2311(70.59%) 
##      Species 
##  705(21.53%) 
## 
## Metadata variables: 13 
##  SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
## 
## $ps_MonteObscuro
## ampvis2 object with 3 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           16         3753       718139        23890        68295        41431 
##    Avg#Reads 
##     44883.69 
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   3753(100%)   3753(100%) 3734(99.49%) 3692(98.37%) 3393(90.41%) 2815(75.01%) 
##      Species 
##  615(16.39%) 
## 
## Metadata variables: 13 
##  SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
## 
## $ps_Naolinco
## ampvis2 object with 3 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           30         2898      2709297        44260       173869        87474 
##    Avg#Reads 
##      90309.9 
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   2898(100%)   2898(100%) 2887(99.62%) 2848(98.27%) 2570(88.68%) 2093(72.22%) 
##      Species 
##  570(19.67%) 
## 
## Metadata variables: 13 
##  SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
## 
## $full_ps_core_counts
## ampvis2 object with 3 elements. 
## Summary of OTU table:
##      Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
##           72          365      2296141         3289       132023      26767.5 
##    Avg#Reads 
##     31890.85 
## 
## Assigned taxonomy:
##     Kingdom      Phylum       Class       Order      Family       Genus 
##   365(100%)   365(100%) 363(99.45%) 358(98.08%) 331(90.68%) 262(71.78%) 
##     Species 
##  80(21.92%) 
## 
## Metadata variables: 13 
##  SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
#subset amp obj

phyloseq_objects <- list(ps4 = ps4, ps_ElCastillo = ps_ElCastillo, ps_MonteObscuro = ps_MonteObscuro, ps_Naolinco = ps_Naolinco, full_ps_core_counts = full_ps_core_counts)

av2_full <- ampvis_objects$ps4
av2_core <- ampvis_objects$full_ps_core_counts
av2_ElCastillo <- ampvis_objects$ps_ElCastillo
av2_MonteObscuro <-ampvis_objects$ps_MonteObscuro
av2_Naolinco <- ampvis_objects$ps_Naolinco

9 Core Microbiome

9.1 Ampvis2

9.1.1 Full Core Compartment

# Full

core_source_av2_full <- amp_venn(av2_full, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_full <- core_source_av2_full$Otutable

#save core compartments otu table
write.table(core_otu_table_av2_full, "results/03.Diversity/Core_compartments_otu_table_ampvis_full.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

9.1.2 Locations Core Compartment

# El Castillo
core_source_av2_ElCastillo <- amp_venn(av2_ElCastillo, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_ElCastillo <- core_source_av2_ElCastillo$Otutable
write.table(core_otu_table_av2_ElCastillo, "results/03.Diversity/Core_compartments_otu_table_ampvis_ElCastillo.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

# Monte Obscuro
core_source_av2_MonteObscuro <- amp_venn(av2_MonteObscuro, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_MonteObscuro <- core_source_av2_MonteObscuro$Otutable
write.table(core_otu_table_av2_MonteObscuro, "results/03.Diversity/Core_compartments_otu_table_ampvis_MonteObscuro.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

# Naolinco
core_source_av2_Naolinco <- amp_venn(av2_Naolinco, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_Naolinco <- core_source_av2_Naolinco$Otutable
write.table(core_otu_table_av2_Naolinco, "results/03.Diversity/Core_compartments_otu_table_ampvis_Naolinco.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

9.2 Microbiome

Based here

Calculate core between Compartments

# count number of samples in each group
table(meta(ps4)$Source, useNA = "always")
## 
##     Rhizhomorph Soil associated           Stipe            <NA> 
##              20              26              26               0

9.2.1 Full Core Compartment

#List of compartments
compartments <- unique(as.character(meta(ps_full_rel_f)$Source))
print(compartments)
## [1] "Stipe"           "Soil associated" "Rhizhomorph"
#Write a for loop to go through each of the compartments one by one and combine identified core taxa into a list.

list_core_full <- list()# an empty object to store information

for (compartment in compartments){ 
    #print(paste0("Identifying Core Taxa for ", compartment))
    
    ps_sub_full <- subset_samples(ps_full_rel_f, Source == compartment) 
    
    core_m_full <- core_members(ps_sub_full,
                           detection = 0, # 100 % samples 
                           prevalence = 0.5) # 50% prevalence
    print(paste0("No. of core taxa in ", compartment, " : ", length(core_m_full)))
   
     # add to a list core taxa for each group.
    list_core_full[[compartment]] <- core_m_full
    
    #create core compartment variable
    variable_name_full <- paste0("core_full_", gsub(" ", "_", compartment))
    assign(variable_name_full, core_m_full, envir = .GlobalEnv)
    
    #write all cores
    file_name_full <- paste0("results/03.Diversity/",variable_name,".csv")
    write.csv(core_m_full, file = file_name_full, row.names = FALSE)

}
## [1] "No. of core taxa in Stipe : 135"
## [1] "No. of core taxa in Soil associated : 902"
## [1] "No. of core taxa in Rhizhomorph : 536"

Plot venn diagram

library(eulerr)

# factor to reorder plot
desired_order <- c("Soil associated", "Rhizhomorph", "Stipe")

# Reorder
list_core_ordered <- list_core_full[desired_order]


# Circular venn
venn_colors <- c(`Soil associated`="#0073C2FF", Rhizhomorph="#EFC000FF", Stipe="#868686FF")
plot_venn_venn <- plot(venn(list_core_ordered), fills = venn_colors, alpha = 0.9)

plot_venn_venn

# Real size venn
euler_venn <- euler(list_core_ordered, shape = "ellipse")

plot_euler_venn <- plot(euler_venn, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))

plot_euler_venn

Get core identities

9.2.2 Core compartments by Location

Define colors and order

# factor to reorder plot
desired_order <- c("Soil associated", "Rhizhomorph", "Stipe")
venn_colors <- c(`Soil associated`="#0073C2FF", Rhizhomorph="#EFC000FF", Stipe="#868686FF")

El Castillo

# El Castillo
compartmentsEC <- unique(as.character(meta(ps_ElCastillo_rel_f)$Source))

# Lista para guardar los core taxa de cada compartimento
list_core_taxaEC <- list()

for (compartment in compartmentsEC) {
  ps_sub <- subset_samples(ps_ElCastillo_rel_f, Source == compartment)
  core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
  
  # Guardar los resultados en una lista
  list_core_taxaEC[[compartment]] <- core_m
  cat("Compartimiento:", compartment, "\n")
  cat("Número de core taxa:", length(core_m), "\n")
  cat("Algunos core taxa:", head(core_m), "\n") 
}
## Compartimiento: Stipe 
## Número de core taxa: 310 
## Algunos core taxa: ASV_1894:f__Pirellulaceae ASV_1226:uncultured_Planctomyces ASV_1830:uncultured_Planctomyces ASV_763:uncultured_Planctomyces ASV_3514:uncultured_Planctomyces ASV_2226:uncultured_Planctomycetaceae 
## Compartimiento: Rhizhomorph 
## Número de core taxa: 582 
## Algunos core taxa: ASV_3775:g__WD2101_soil_group ASV_2220:g__WD2101_soil_group ASV_1924:uncultured_Pasteuria ASV_765:uncultured_Pasteuria ASV_2059:f__Pirellulaceae ASV_1226:uncultured_Planctomyces 
## Compartimiento: Soil associated 
## Número de core taxa: 1239 
## Algunos core taxa: ASV_5678:g__WD2101_soil_group ASV_3775:g__WD2101_soil_group ASV_3038:g__WD2101_soil_group ASV_5668:Planctomycetales_bacterium ASV_2220:g__WD2101_soil_group ASV_2821:g__WD2101_soil_group
#Plot venn
library(eulerr)

# Reorder
list_core_ordered_EC <- list_core_taxaEC[desired_order]
# Circular venn
plot_venn_venn_EC <- plot(venn(list_core_ordered_EC), fills = venn_colors, alpha = 0.9)

# Real size venn
euler_venn_EC <- euler(list_core_ordered_EC, shape = "ellipse")
plot_euler_venn_EC <- plot(euler_venn_EC, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))

plot_venn_venn_EC

plot_euler_venn_EC

Monte Obscuro

# El Castillo
compartmentsMO <- unique(as.character(meta(ps_MonteObscuro_rel_f)$Source))

# Lista para guardar los core taxa de cada compartimento
list_core_taxaMO <- list()

for (compartment in compartmentsMO) {
  ps_sub <- subset_samples(ps_MonteObscuro_rel_f, Source == compartment)
  core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
  
  # Guardar los resultados en una lista
  list_core_taxaMO[[compartment]] <- core_m
  cat("Compartment:", compartment, "\n")
  cat("Number of core taxa:", length(core_m), "\n")
  cat("Some core taxa:", head(core_m), "\n") 
}
## Compartment: Stipe 
## Number of core taxa: 374 
## Some core taxa: ASV_1035:uncultured_Planctomycetaceae ASV_1218:g__Pir4_lineage ASV_800:uncultured_Planctomycetaceae ASV_498:g__Pir4_lineage ASV_4143:uncultured_soil ASV_2265:f__Pirellulaceae 
## Compartment: Soil associated 
## Number of core taxa: 2170 
## Some core taxa: ASV_1034:uncultured_planctomycete ASV_1428:g__WD2101_soil_group ASV_4955:g__WD2101_soil_group ASV_2133:g__WD2101_soil_group ASV_4953:uncultured_planctomycete ASV_1797:g__WD2101_soil_group 
## Compartment: Rhizhomorph 
## Number of core taxa: 1285 
## Some core taxa: ASV_2133:g__WD2101_soil_group ASV_4953:uncultured_planctomycete ASV_1797:g__WD2101_soil_group ASV_1945:g__WD2101_soil_group ASV_2459:g__WD2101_soil_group ASV_779:g__WD2101_soil_group
#Plot venn
library(eulerr)

# Reorder
list_core_ordered_MO <- list_core_taxaMO[desired_order]
# Circular venn
plot_venn_venn_MO <- plot(venn(list_core_ordered_MO), fills = venn_colors, alpha = 0.9)

# Real size venn
euler_venn_MO <- euler(list_core_ordered_MO, shape = "ellipse")
plot_euler_venn_MO <- plot(euler_venn_MO, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))

plot_venn_venn_MO

plot_euler_venn_MO

Naolinco

# El Castillo
compartmentsN <- unique(as.character(meta(ps_Naolinco_rel_f)$Source))

# Lista para guardar los core taxa de cada compartimento
list_core_taxaN <- list()

for (compartment in compartmentsN) {
  ps_sub <- subset_samples(ps_Naolinco_rel_f, Source == compartment)
  core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
  
  # Guardar los resultados en una lista
  list_core_taxaN[[compartment]] <- core_m
  cat("Compartment:", compartment, "\n")
  cat("Number of core taxa:", length(core_m), "\n")
  cat("Some core taxa:", head(core_m), "\n") 
}
## Compartment: Stipe 
## Number of core taxa: 378 
## Some core taxa: ASV_779:g__WD2101_soil_group ASV_800:uncultured_Planctomycetaceae ASV_765:uncultured_Pasteuria ASV_1894:f__Pirellulaceae ASV_1044:g__Gemmata ASV_939:uncultured_Prosthecobacter 
## Compartment: Rhizhomorph 
## Number of core taxa: 990 
## Some core taxa: ASV_1797:g__WD2101_soil_group ASV_1823:uncultured_planctomycete ASV_779:g__WD2101_soil_group ASV_1560:g__Pir4_lineage ASV_800:uncultured_Planctomycetaceae ASV_498:g__Pir4_lineage 
## Compartment: Soil associated 
## Number of core taxa: 1735 
## Some core taxa: ASV_1034:uncultured_planctomycete ASV_2133:g__WD2101_soil_group ASV_1797:g__WD2101_soil_group ASV_1823:uncultured_planctomycete ASV_1945:g__WD2101_soil_group ASV_2459:g__WD2101_soil_group
#Plot venn
library(eulerr)

# Reorder
list_core_ordered_N <- list_core_taxaN[desired_order]
# Circular venn
plot_venn_venn_N <- plot(venn(list_core_ordered_N), fills = venn_colors, alpha = 0.9)

# Real size venn
euler_venn_N <- euler(list_core_ordered_N, shape = "ellipse")
plot_euler_venn_N <- plot(euler_venn_N, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))

plot_venn_venn_N

plot_euler_venn_N

Combine plots

library(cowplot)


#Combine venn plot
title_vennplot <- ggdraw() + draw_label("Core Microbiome", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
plot_venn_venn_fmt <- plot_grid(plot_venn_venn, labels = c("A"), ncol = 1)
venn_loc_plots <- plot_grid(plot_venn_venn_EC, plot_venn_venn_MO, plot_venn_venn_N, labels = c("B", "C", "D"), ncol = 3)
venn_all_plots <- plot_grid(title_vennplot, plot_venn_venn_fmt, venn_loc_plots, ncol = 1, rel_heights = c(0.2, 2, 1.7))

venn_all_plots

library(cowplot)
#Combine euler plot
title_vennplot <- ggdraw() + draw_label("Core Microbiome", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
plot_euler_venn_fmt <- plot_grid(plot_euler_venn, labels = c("A"), ncol = 1)
euler_venn_loc_plots <- plot_grid(plot_euler_venn_EC, plot_euler_venn_MO, plot_euler_venn_N, labels = c("B", "C", "D"), ncol = 3)
euler_venn_all_plots <- plot_grid(title_vennplot, plot_euler_venn_fmt, euler_venn_loc_plots, ncol = 1, rel_heights = c(0.2, 2, 1.7))

euler_venn_all_plots

10 Abundances

10.1 Relative abundances

10.1.1 Full

All

#Factor Source 
av2_full$metadata$Compartment <- factor(av2_full$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

# Full all samples
ampv_heatmap_abundances_genus_full <- amp_heatmap(av2_full,
            group_by = "Compartment",
            facet_by = "Location",
            plot_values = TRUE,
            tax_show = 20,
            tax_aggregate = "Genus",
            #tax_add = "Family",
            plot_colorscale = "log10",
            plot_legendbreaks = c(1, 5, 5),
            color_vector = c("white","deepskyblue3","magenta3"))

Locations

#Factor Source 
av2_ElCastillo$metadata$Compartment <- factor(av2_ElCastillo$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# El castillo
ampv_heatmap_abundances_genus_EC <- amp_heatmap(av2_ElCastillo,
            group_by = "Compartment",
            plot_values = TRUE,
            tax_show = 20,
            tax_aggregate = "Genus",
            #tax_add = "Family",
            plot_colorscale = "log10",
            plot_legendbreaks = c(1, 5, 5),
            color_vector = c("white","deepskyblue3","magenta3"))

#Factor Source 
av2_MonteObscuro$metadata$Compartment <- factor(av2_MonteObscuro$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Monte Obscuro
ampv_heatmap_abundances_genus_MO <- amp_heatmap(av2_MonteObscuro,
            group_by = "Compartment",
            plot_values = TRUE,
            tax_show = 20,
            tax_aggregate = "Genus",
            #tax_add = "Family",
            plot_colorscale = "log10",
            plot_legendbreaks = c(1, 5, 5),
            color_vector = c("white","deepskyblue3","magenta3"))

#Factor Source 
av2_Naolinco$metadata$Compartment <- factor(av2_Naolinco$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Naolinco
ampv_heatmap_abundances_genus_N <- amp_heatmap(av2_Naolinco,
            group_by = "Compartment",
            plot_values = TRUE,
            tax_show = 20,
            tax_aggregate = "Genus",
            #tax_add = "Family",
            plot_colorscale = "log10",
            plot_legendbreaks = c(1, 5, 5),
            color_vector = c("white","deepskyblue3","magenta3"))

Combine plots

library(cowplot)
#Combine venn plot
title_abund_plot <- ggdraw() + draw_label("Relative Abundance", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
ampv_abund_all_g_fmt <- plot_grid(ampv_heatmap_abundances_genus_full, labels = c("A"), ncol = 1)
## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
ampv_abund_loc_g <- plot_grid(ampv_heatmap_abundances_genus_EC, ampv_heatmap_abundances_genus_MO, ampv_heatmap_abundances_genus_N, labels = c("B", "C", "D"), ncol = 3)
## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
ampv_all_g_plots <- plot_grid(title_abund_plot, ampv_abund_all_g_fmt, ampv_abund_loc_g, ncol = 1, rel_heights = c(0.2, 1, 1))

ampv_all_g_plots

### Core

#Factor Source 
av2_core$metadata$Compartment <- factor(av2_core$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

# Full all samples
ampv_heatmap_abundances_genus_core <- amp_heatmap(av2_core,
            group_by = "Compartment",
            facet_by = "Location",
            plot_values = TRUE,
            tax_show = 20,
            tax_aggregate = "Genus",
            #tax_add = "Family",
            plot_colorscale = "log10",
            plot_legendbreaks = c(1, 5, 5),
            color_vector = c("white","deepskyblue3","magenta3"))

ampv_heatmap_abundances_genus_core
## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.

10.2 Rank abundance

10.2.1 Full

All

library(ampvis2)
library(ggplot2)

#Factor Source 
av2_full$metadata$Compartment <- factor(av2_full$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

# rank abundance
rank_ab_full <- amp_rankabundance(av2_full, log10_x = TRUE, group_by = "Compartment") +
  scale_color_jco() 

#ggsave("results/plots/02.diversity/02.rank_abundance.png", rank_ab , width = 5, height = 4)

Location

# El Castillo
#Factor Source 
av2_ElCastillo$metadata$Compartment <- factor(av2_ElCastillo$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_EC <- amp_rankabundance(av2_ElCastillo, log10_x = TRUE, group_by = "Compartment") +
  scale_color_jco() 

# Monte Obscuro
#Factor Source 
av2_MonteObscuro$metadata$Compartment <- factor(av2_MonteObscuro$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_MO <- amp_rankabundance(av2_MonteObscuro, log10_x = TRUE, group_by = "Compartment") +
  scale_color_jco() 

# Naolinco
#Factor Source 
av2_Naolinco$metadata$Compartment <- factor(av2_Naolinco$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_N <- amp_rankabundance(av2_Naolinco, log10_x = TRUE, group_by = "Compartment") +
  scale_color_jco()

Combine

library(cowplot)
#Combine euler plot
title_rank_plot <- ggdraw() + draw_label("Relative diversity and species evenness", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)

rank_ab_full_EC <- plot_grid(rank_ab_full, rank_ab_EC, labels = c("A", "B"), ncol = 2)
## Warning: Removed 1887 rows containing missing values or values outside the scale range
## (`geom_line()`).
rank_ab_MO_N_plots <- plot_grid(rank_ab_MO, rank_ab_N, labels = c("C", "D"), ncol = 2)
rankabund_all_plots <- plot_grid(title_rank_plot, rank_ab_full_EC, rank_ab_MO_N_plots, ncol = 1,  rel_heights = c(0.16, 1, 1))

rankabund_all_plots

10.2.2 Core

library(ampvis2)
library(ggplot2)

#Factor Source 
av2_core$metadata$Compartment <- factor(av2_core$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

# rank abundance
rank_ab_full_core <- amp_rankabundance(av2_core, log10_x = TRUE, group_by = "Compartment") +
  scale_color_jco()

rank_ab_full_core

11 Alpha diversity

11.1 Full

11.1.1 Get Hill numbers

library(hilldiv)
## Registered S3 methods overwritten by 'FSA':
##   method       from
##   confint.boot car 
##   hist.boot    car
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ stringr   1.5.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ microbiome::alpha() masks ggplot2::alpha()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ lubridate::stamp()  masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Get hill numbers
q0 <- hill_div(otu_table_fltr, qvalue = 0) 
q1 <- hill_div(otu_table_fltr, qvalue = 1) 
q2 <- hill_div(otu_table_fltr, qvalue = 2)

# Merge metadata with Hill numbers
q012_all <- cbind(q0, q1, q2) %>% as.data.frame() %>% rownames_to_column(var = "sample-id")
metadata_with_hill_fltr <- q012_all %>%                                      
  inner_join(metadata_fltr, by = c("sample-id"="sample-id"))

#save table
write.table(metadata_with_hill_fltr, "results/03.Diversity/Metadata_with_hill.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

# show
metadata_with_hill_fltr %>% head() %>% kable()
sample-id q0 q1 q2 sample Source Date Year Location Week Raw reads Colect_number Specimen quant_reading Effective_reads ASVs
DR1014-E 135 9.036873 3.171788 DR1014-E Stipe sep-30 2021 Monte obscuro Emiliano Zapata 1 159896 DRamos 1014 Ind_1 948 68295 135
DR1014-SAH 2118 1145.961220 576.966109 DR1014-SAH Soil associated sep-30 2021 Monte obscuro Emiliano Zapata 1 223804 DRamos 1014 Ind_1 3071 40974 2118
DR1018-E 768 47.874948 5.350837 DR1018-E Stipe oct-07 2021 Monte obscuro Emiliano Zapata 2 68578 DRamos 1018 Ind_3 97 23890 768
DR1018-SAH 1824 887.566616 436.200764 DR1018-SAH Soil associated oct-07 2021 Monte obscuro Emiliano Zapata 2 211120 DRamos 1018 Ind_3 4017 38161 1824
DR1019-E 1012 447.167799 145.072574 DR1019-E Stipe oct-07 2021 Monte obscuro Emiliano Zapata 2 144270 DRamos 1019 Ind_4 215 33853 1012
DR1019-SAH 2202 1317.872289 723.411536 DR1019-SAH Soil associated oct-07 2021 Monte obscuro Emiliano Zapata 2 185380 DRamos 1019 Ind_4 3754 34576 2202

11.1.2 Explore Hill distribution

Get means

# Reorder q values  
meta_qs <- metadata_with_hill_fltr %>%
    pivot_longer(cols = q0:q2, names_to = "q", values_to = "value") %>%
    filter(q %in% c("q0", "q1", "q2")) %>%
    mutate(
        qs = case_when(
            q == "q0" ~ "q=0",
            q == "q1" ~ "q=1",
            q == "q2" ~ "q=2"
        ))

#Get means of Hill numbers
means <- meta_qs %>% group_by(Source, Location, qs) %>%
  summarise(means = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE),
            .groups = 'drop')

#save table
write.table(means, "results/03.Diversity/Hill_means_sd.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

print(means)
## # A tibble: 27 × 5
##    Source          Location                      qs    means    sd
##    <fct>           <fct>                         <chr> <dbl> <dbl>
##  1 Soil associated El Castillo/Zentla            q=0   1378. 201. 
##  2 Soil associated El Castillo/Zentla            q=1    531.  88.4
##  3 Soil associated El Castillo/Zentla            q=2    183.  47.6
##  4 Soil associated Monte obscuro Emiliano Zapata q=0   2100. 154. 
##  5 Soil associated Monte obscuro Emiliano Zapata q=1   1139. 142. 
##  6 Soil associated Monte obscuro Emiliano Zapata q=2    591. 114. 
##  7 Soil associated Naolinco                      q=0   1733. 131. 
##  8 Soil associated Naolinco                      q=1    524. 165. 
##  9 Soil associated Naolinco                      q=2    146. 101. 
## 10 Rhizhomorph     El Castillo/Zentla            q=0   1103. 256. 
## # ℹ 17 more rows

11.1.3 Plot Hill distribution

library(ggpubr) 
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
## The following object is masked from 'package:qiime2R':
## 
##     mean_sd
# factor to reorder plot
metadata_with_hill_fltr$Location <- factor(metadata_with_hill_fltr$Location)
metadata_with_hill_fltr$Compartment  <- factor(metadata_with_hill_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))


#plot

hill_barplot_explore_fltr <- metadata_with_hill_fltr %>%
  pivot_longer(cols = q0:q2, names_to = "q", values_to = "value") %>%
  filter(q %in% c("q0", "q1", "q2")) %>%
  mutate(
    qs = case_when(
      q == "q0" ~ "q=0",
      q == "q1" ~ "q=1",
      q == "q2" ~ "q=2"
    )) %>%
  ggbarplot(
    x = "Compartment",
    y = "value",
    add = "mean_se",
    facet.by = c("qs", "Location"),
    fill = "Compartment"
  ) +
  scale_fill_jco() +
  geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
  facet_grid(rows = vars(qs), cols = vars(Location), scales = "free_y") +
  theme(
    strip.background =  element_blank(), #element_rect(fill = "")
    strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
    strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.line = element_line(colour = "black"),
    #panel.border = element_blank(),
    panel.spacing.x = unit(0.5, "lines"),
    panel.background = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y = element_text(colour = "black", size = 11),
    legend.position = "bottom"
  ) +
  labs(
    fill = "Compartment",
    y = "",
    x = "", #,
    title = "Biodiversity across Locations and Compartments"
  ) + #+
  #theme(plot.title = element_text(hjust = 0.5))
   theme(legend.text = element_text(size = 12)) #+
  #stat_summary(fun = mean, geom = "line", aes(group = Compartment), color = "red")

hill_barplot_explore_fltr

ggsave("results/plots/02.diversity/02.Hill_diversity_across_Locations_and_Compartments_fltr.png", hill_barplot_explore_fltr, width = 11, height = 11)

11.1.4 Depth vs Hill Correlation

Alpha diversity depth correlation to order q=0

#install.packages("ggpubr")
library(ggpubr)

q0_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr, 
                         x = "Effective_reads", 
                         y = "q0", 
                         xlab= "Sequencing depth (number of reads)",
                         ylab = "q=0",
                         #ylab="Alpha diversity q=0 (effective number of total ASVs)",
   add = "reg.line",  # Add regression line
   add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
   conf.int = TRUE, # Add confidence interval
   cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
   cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
   )+
  theme(legend.title = element_blank(), legend.position = "none")

Alpha diversity depth correlation to order q=1

q1_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr, 
                         x = "Effective_reads", 
                         y = "q1", 
                         xlab= "Sequencing depth (number of reads)",
                         ylab = "q=1",
                         #ylab="Alpha diversity q=1 (effective number of total ASVs)",
   add = "reg.line",  # Add regression line
   add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
   conf.int = TRUE, # Add confidence interval
   cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
   cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
   )+
  theme(legend.title = element_blank(), legend.position = "none")

Alpha diversity depth correlation to order q=2

q2_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr, 
                         x = "Effective_reads", 
                         y = "q2", 
                         xlab= "Sequencing depth (number of reads)",
                         ylab = "q=2",
                         #ylab="Alpha diversity q=2 (effective number of total ASVs)",
   add = "reg.line",  # Add regression line
   add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
   conf.int = TRUE, # Add confidence interval
   cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
   cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
   )+
  theme(legend.title = element_blank(), legend.position = "none")
library(ggplot2)
library(cowplot)

#save plots

#Combine plot
title_corr_plot <- ggdraw() + draw_label("Alpha diversity depth correlation with fltr samples")
correlation_plot_q012_fltr <- plot_grid(title_corr_plot, q0_vs_depth_fltr,q1_vs_depth_fltr,q2_vs_depth_fltr, labels = c(" ","A", "B", "C"), ncol = 1, rel_heights = c(0.15, 1, 1, 1))
correlation_plot_q012_fltr

#save plot
ggsave("results/plots/02.diversity/03.alpha_depth_correlation_fltr_samples.png", correlation_plot_q012_fltr, width = 8, height = 8)

11.1.5 Check Hill numbers Normality

Shapiro test to Hill numbers

library(ggplot2)
library(cowplot)

#Shapiro test
shapiro_q0_fltr <- shapiro.test(metadata_with_hill_fltr$q0)
shapiro_q0_fltr_pvalue <- round(shapiro_q0_fltr$p.value, 5)
shapiro_q1_fltr <- shapiro.test(metadata_with_hill_fltr$q1)
shapiro_q1_fltr_pvalue <- round(shapiro_q1_fltr$p.value, 5)
shapiro_q2_fltr <- shapiro.test(metadata_with_hill_fltr$q2)
shapiro_q2_fltr_pvalue <- round(shapiro_q2_fltr$p.value, 9)

#Histograms
histplot_q0_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q0, xlab="q=0")) +
  geom_histogram(fill = "lightblue", bins = 15) +
  ggtitle(paste("Shapiro, p-value:", shapiro_q0_fltr_pvalue)) +
  theme_bw() + xlab("q=0") + ylab("Frequency")

histplot_q1_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q1)) +
  geom_histogram(fill = "lightblue", bins = 15) +
  ggtitle(paste("Shapiro, p-value:", shapiro_q1_fltr_pvalue)) +
  theme_bw() + xlab("q=1") + ylab("Frequency")

histplot_q2_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q2)) +
  geom_histogram(fill = "lightblue", bins = 15) +
  ggtitle(paste("Shapiro, p-value:", shapiro_q2_fltr_pvalue)) +
  theme_bw() + xlab("q=2") + ylab("Frequency")

#Combine plot
title_plot <- ggdraw() + draw_label("Histogram of Hill diversity") #, fontface = 'bold', x = 0.5, hjust = 0.5)
histplot_q012_fltr <- plot_grid(title_plot, histplot_q0_fltr, histplot_q1_fltr, histplot_q2_fltr, labels = c(" ","A", "B", "C"), ncol = 1, rel_heights = c(0.15, 1, 1, 1))
histplot_q012_fltr

#save plot
ggsave("results/plots/02.diversity/04.hill_normality_test_histogram_fltr_samples.png", histplot_q012_fltr)
## Saving 7 x 5 in image

A pesar de que q0 tiene una distribución normal, dado que los datos del proyecto tienen medidas repetidas y desbalanceo, lo adecuado es hacer comparaciones basadas en modelos. En este caso modelos lineales mixtos

11.1.6 Models

11.1.6.1 Linear Mixed Models

Estos modelos son útiles cuando las observaciones tienen estructuras de dependencia, como datos longitudinales o datos agrupados/anidados. Los modelos lineales mixtos incluyen tanto efectos fijos como efectos aleatorios, donde los efectos fijos son comunes a todas las unidades/individuos, y los efectos aleatorios varían por grupo o unidad.

lme

library(nlme)
# lme

# q0
q0_lme_fltr <- lme(q0 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q0_lme_perm_fltr <- PermTest(q0_lme_fltr, B = 100)

# q1
q1_lme_fltr <- lme(q1 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q1_lme_perm_fltr <- PermTest(q1_lme_fltr, B = 100)

# q2
q2_lme_fltr <- lme(q2 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q2_lme_perm_fltr <- PermTest(q2_lme_fltr, B = 100)

11.1.6.2 Generalized Linear Models

glm

#q0 glm
q0_glm_fltr <- glm(q0 ~ log(Effective_reads)+Source*Location,
              family = poisson(link = "log"),
              data = metadata_with_hill_fltr)

#q1 glm
q1_glm_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
              family = poisson(link = "log"),
              data = metadata_with_hill_fltr)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.036873
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1145.961220
## Warning in dpois(y, mu, log = TRUE): non-integer x = 47.874948
## Warning in dpois(y, mu, log = TRUE): non-integer x = 887.566616
## Warning in dpois(y, mu, log = TRUE): non-integer x = 447.167799
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1317.872289
## Warning in dpois(y, mu, log = TRUE): non-integer x = 375.937389
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1212.407272
## Warning in dpois(y, mu, log = TRUE): non-integer x = 114.546532
## Warning in dpois(y, mu, log = TRUE): non-integer x = 689.583452
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1236.713620
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.775571
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1141.539572
## Warning in dpois(y, mu, log = TRUE): non-integer x = 198.051965
## Warning in dpois(y, mu, log = TRUE): non-integer x = 649.434099
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1033.757810
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.120060
## Warning in dpois(y, mu, log = TRUE): non-integer x = 236.465478
## Warning in dpois(y, mu, log = TRUE): non-integer x = 470.600863
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.056484
## Warning in dpois(y, mu, log = TRUE): non-integer x = 191.591875
## Warning in dpois(y, mu, log = TRUE): non-integer x = 515.230864
## Warning in dpois(y, mu, log = TRUE): non-integer x = 118.919348
## Warning in dpois(y, mu, log = TRUE): non-integer x = 286.196272
## Warning in dpois(y, mu, log = TRUE): non-integer x = 521.533059
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.243804
## Warning in dpois(y, mu, log = TRUE): non-integer x = 483.242102
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.992440
## Warning in dpois(y, mu, log = TRUE): non-integer x = 76.214330
## Warning in dpois(y, mu, log = TRUE): non-integer x = 502.299672
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.362994
## Warning in dpois(y, mu, log = TRUE): non-integer x = 406.547029
## Warning in dpois(y, mu, log = TRUE): non-integer x = 610.775331
## Warning in dpois(y, mu, log = TRUE): non-integer x = 456.279074
## Warning in dpois(y, mu, log = TRUE): non-integer x = 631.344825
## Warning in dpois(y, mu, log = TRUE): non-integer x = 625.359839
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.302276
## Warning in dpois(y, mu, log = TRUE): non-integer x = 99.082917
## Warning in dpois(y, mu, log = TRUE): non-integer x = 384.027703
## Warning in dpois(y, mu, log = TRUE): non-integer x = 160.920694
## Warning in dpois(y, mu, log = TRUE): non-integer x = 600.653719
## Warning in dpois(y, mu, log = TRUE): non-integer x = 667.285427
## Warning in dpois(y, mu, log = TRUE): non-integer x = 279.227229
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.358681
## Warning in dpois(y, mu, log = TRUE): non-integer x = 55.066246
## Warning in dpois(y, mu, log = TRUE): non-integer x = 141.925035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.144746
## Warning in dpois(y, mu, log = TRUE): non-integer x = 110.564833
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.028120
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.557778
## Warning in dpois(y, mu, log = TRUE): non-integer x = 211.697465
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.425755
## Warning in dpois(y, mu, log = TRUE): non-integer x = 351.503682
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.579458
## Warning in dpois(y, mu, log = TRUE): non-integer x = 250.463366
## Warning in dpois(y, mu, log = TRUE): non-integer x = 444.833675
## Warning in dpois(y, mu, log = TRUE): non-integer x = 124.745022
## Warning in dpois(y, mu, log = TRUE): non-integer x = 94.655639
## Warning in dpois(y, mu, log = TRUE): non-integer x = 305.856635
## Warning in dpois(y, mu, log = TRUE): non-integer x = 145.462631
## Warning in dpois(y, mu, log = TRUE): non-integer x = 177.480033
## Warning in dpois(y, mu, log = TRUE): non-integer x = 52.411210
## Warning in dpois(y, mu, log = TRUE): non-integer x = 516.220144
## Warning in dpois(y, mu, log = TRUE): non-integer x = 288.582443
## Warning in dpois(y, mu, log = TRUE): non-integer x = 398.115993
## Warning in dpois(y, mu, log = TRUE): non-integer x = 668.236422
## Warning in dpois(y, mu, log = TRUE): non-integer x = 486.264725
## Warning in dpois(y, mu, log = TRUE): non-integer x = 321.527670
## Warning in dpois(y, mu, log = TRUE): non-integer x = 531.867537
## Warning in dpois(y, mu, log = TRUE): non-integer x = 747.886370
## Warning in dpois(y, mu, log = TRUE): non-integer x = 769.539696
## Warning in dpois(y, mu, log = TRUE): non-integer x = 510.480594
#q2 glm
q2_glm_fltr <- glm(q2 ~ log(Effective_reads)+Source*Location,
              family = poisson(link = "log"),
              data = metadata_with_hill_fltr)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.171788
## Warning in dpois(y, mu, log = TRUE): non-integer x = 576.966109
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.350837
## Warning in dpois(y, mu, log = TRUE): non-integer x = 436.200764
## Warning in dpois(y, mu, log = TRUE): non-integer x = 145.072574
## Warning in dpois(y, mu, log = TRUE): non-integer x = 723.411536
## Warning in dpois(y, mu, log = TRUE): non-integer x = 153.597535
## Warning in dpois(y, mu, log = TRUE): non-integer x = 666.541403
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.456157
## Warning in dpois(y, mu, log = TRUE): non-integer x = 178.728096
## Warning in dpois(y, mu, log = TRUE): non-integer x = 676.765401
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.375549
## Warning in dpois(y, mu, log = TRUE): non-integer x = 617.084909
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.482863
## Warning in dpois(y, mu, log = TRUE): non-integer x = 234.062638
## Warning in dpois(y, mu, log = TRUE): non-integer x = 441.625630
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.057934
## Warning in dpois(y, mu, log = TRUE): non-integer x = 62.632737
## Warning in dpois(y, mu, log = TRUE): non-integer x = 216.098156
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.800651
## Warning in dpois(y, mu, log = TRUE): non-integer x = 43.314536
## Warning in dpois(y, mu, log = TRUE): non-integer x = 139.622390
## Warning in dpois(y, mu, log = TRUE): non-integer x = 36.762232
## Warning in dpois(y, mu, log = TRUE): non-integer x = 79.269153
## Warning in dpois(y, mu, log = TRUE): non-integer x = 162.051399
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.485131
## Warning in dpois(y, mu, log = TRUE): non-integer x = 170.638980
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.551297
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.942091
## Warning in dpois(y, mu, log = TRUE): non-integer x = 127.678843
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.524882
## Warning in dpois(y, mu, log = TRUE): non-integer x = 122.819172
## Warning in dpois(y, mu, log = TRUE): non-integer x = 209.071791
## Warning in dpois(y, mu, log = TRUE): non-integer x = 103.936882
## Warning in dpois(y, mu, log = TRUE): non-integer x = 278.567311
## Warning in dpois(y, mu, log = TRUE): non-integer x = 168.692594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.734694
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.701974
## Warning in dpois(y, mu, log = TRUE): non-integer x = 167.217679
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.685820
## Warning in dpois(y, mu, log = TRUE): non-integer x = 210.245304
## Warning in dpois(y, mu, log = TRUE): non-integer x = 284.919159
## Warning in dpois(y, mu, log = TRUE): non-integer x = 46.476316
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.680969
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.637827
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.473794
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.983116
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.873471
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.263018
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.779628
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.277613
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.602524
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.393270
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.950724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 52.948584
## Warning in dpois(y, mu, log = TRUE): non-integer x = 151.370352
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.340366
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.849351
## Warning in dpois(y, mu, log = TRUE): non-integer x = 48.667022
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.718140
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.618940
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.535976
## Warning in dpois(y, mu, log = TRUE): non-integer x = 95.808905
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.011247
## Warning in dpois(y, mu, log = TRUE): non-integer x = 83.060795
## Warning in dpois(y, mu, log = TRUE): non-integer x = 238.121940
## Warning in dpois(y, mu, log = TRUE): non-integer x = 164.033797
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.685546
## Warning in dpois(y, mu, log = TRUE): non-integer x = 81.619451
## Warning in dpois(y, mu, log = TRUE): non-integer x = 255.500307
## Warning in dpois(y, mu, log = TRUE): non-integer x = 347.159348
## Warning in dpois(y, mu, log = TRUE): non-integer x = 90.468592

glm quasipoisson

#q0 glm quasipoisson
q0_glm_quasipoisson_fltr <- glm(q0 ~ log(Effective_reads)+Source*Location,
              family = quasipoisson(link = "log"),
              data = metadata_with_hill_fltr)

#q1 glm quasipoisson
q1_glm_quasipoisson_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
              family = quasipoisson(link = "log"),
              data = metadata_with_hill_fltr)

#q2 glm quasipoisson
q2_glm_quasipoisson_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
              family = quasipoisson(link = "log"),
              data = metadata_with_hill_fltr)

11.1.6.3 Fitting Generalized Linear Mixed-Effects Models

glmer

#q0 glmer
q0_glmer_fltr <- glmer(q0 ~ log(Effective_reads)+Source*Location+(1|Specimen),
             family = poisson(link = "log"),
             data = metadata_with_hill_fltr)

#q1 glmer
q1_glmer_fltr <- glmer(q1 ~ log(Effective_reads)+Source*Location+(1|Specimen),
             family = poisson(link = "log"),
             data = metadata_with_hill_fltr)

#q2 glmer
q2_glmer_fltr <- glmer(q2 ~ log(Effective_reads)+Source*Location+(1|Specimen),
             family = poisson(link = "log"),
             data = metadata_with_hill_fltr)

11.1.6.4 Fitting Generalized Linear Mixed-Effects Models by Maximum Likelihood

glmmTMB

# glmmTMB
# q0
q0_glmmTMB_fltr <- glmmTMB(q0 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
    family = poisson(link = "log"),
    data = metadata_with_hill_fltr,
    dispformula = ~ 1)  

# q1
q1_glmmTMB_fltr <- glmmTMB(q1 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
    family = poisson(link = "log"),
    data = metadata_with_hill_fltr,
    dispformula = ~ 1)
## Warning in glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a poisson model
# q2
q2_glmmTMB_fltr <- glmmTMB(q2 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
    family = poisson(link = "log"),
    data = metadata_with_hill_fltr,
    dispformula = ~ 1)
## Warning in glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a poisson model

glmmTMB negative binomial

# glmmTMB
# q0
q0_glmmTMB_negbinml_fltr <- glmmTMB(q0 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
                          family = nbinom2(link = "log"),
                          data = metadata_with_hill_fltr,
                          dispformula = ~ 1)

# q1
q1_glmmTMB_negbinml_fltr <- glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
                          family = nbinom2(link = "log"),
                          data = metadata_with_hill_fltr,
                          dispformula = ~ 1)
## Warning in glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a nbinom2 model
# q2
q2_glmmTMB_negbinml_fltr <- glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
                          family = nbinom2(link = "log"),
                          data = metadata_with_hill_fltr,
                          dispformula = ~ 1)
## Warning in glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a nbinom2 model

11.1.6.5 Compare models

calculate_AIC_quasiPoisson <- function(model) {
  residuals <- residuals(model, type = "pearson")
  deviance <- sum(residuals^2)
  n <- length(residuals)
  logLikelihood <- -0.5 * (n * log(2 * pi * deviance / n) + deviance)
  k <- length(coef(model))
  AIC_value <- 2 * k - 2 * logLikelihood
  return(AIC_value)
}
q012_gmodels_comparison_fltr <- data.frame(
  Model = c("LME q0", "GLM Poisson q0", "GLM Quasi-Poisson q0", "GLMER q0", "GLMMTMB q0", "GLMMTMB_bn q0",
            "LME q1", "GLM Poisson q1", "GLM Quasi-Poisson q1", "GLMER q1", "GLMMTMB q1","GLMMTMB_bn q1",
            "LME q2", "GLM Poisson q2", "GLM Quasi-Poisson q2", "GLMER q2", "GLMMTMB q2","GLMMTMB_bn q2"),
  AIC = c(AIC(q0_lme_fltr), AIC(q0_glm_fltr), calculate_AIC_quasiPoisson(q0_glm_quasipoisson_fltr), AIC(q0_glmer_fltr), AIC(q0_glmmTMB_fltr), AIC(q0_glmmTMB_negbinml_fltr),
          AIC(q1_lme_fltr), AIC(q1_glm_fltr), calculate_AIC_quasiPoisson(q1_glm_quasipoisson_fltr), AIC(q1_glmer_fltr), AIC(q1_glmmTMB_fltr), AIC(q1_glmmTMB_negbinml_fltr),
          AIC(q2_lme_fltr), AIC(q2_glm_fltr), calculate_AIC_quasiPoisson(q2_glm_quasipoisson_fltr), AIC(q2_glmer_fltr), AIC(q2_glmmTMB_fltr), AIC(q2_glmmTMB_negbinml_fltr)),
  BIC = c(BIC(q0_lme_fltr), BIC(q0_glm_fltr), BIC(q0_glm_quasipoisson_fltr), BIC(q0_glmer_fltr), BIC(q0_glmmTMB_fltr), BIC(q0_glmmTMB_negbinml_fltr),
          BIC(q1_lme_fltr), BIC(q1_glm_fltr), BIC(q1_glm_quasipoisson_fltr), BIC(q1_glmer_fltr), BIC(q1_glmmTMB_fltr), BIC(q1_glmmTMB_negbinml_fltr),
          BIC(q2_lme_fltr), BIC(q2_glm_fltr), BIC(q2_glm_quasipoisson_fltr), BIC(q2_glmer_fltr), BIC(q2_glmmTMB_fltr), BIC(q2_glmmTMB_negbinml_fltr))
  
  
)

# show
q012_gmodels_comparison_fltr

11.1.6.6 LME ckeck

The best AIC/BIC values are with lme, so, I will check lme model assumptions

Effect

# extract p-values fun
extract_pvalues <- function(perm_test_result) {
  perm_test_result$resultats$p.value  
}

# extract
lme_pvalues_q0_fltr <- extract_pvalues(q0_lme_perm_fltr)
lme_pvalues_q1_fltr <- extract_pvalues(q1_lme_perm_fltr)
lme_pvalues_q2_fltr <- extract_pvalues(q2_lme_perm_fltr)

# Dataframe  p-values
library(dplyr)

lme_pvalues_table_fltr <- data.frame(
  Hill = c("q0", "q1", "q2"),
  `Intercept` = c(lme_pvalues_q0_fltr[1], lme_pvalues_q1_fltr[1], lme_pvalues_q2_fltr[1]),
  `log(Depth)` = c(lme_pvalues_q0_fltr[2], lme_pvalues_q1_fltr[2], lme_pvalues_q2_fltr[2]),
  `Source` = c(lme_pvalues_q0_fltr[3], lme_pvalues_q1_fltr[3], lme_pvalues_q2_fltr[3]),
  `Location` = c(lme_pvalues_q0_fltr[4], lme_pvalues_q1_fltr[4], lme_pvalues_q2_fltr[4]),
  `Source:Location` = c(lme_pvalues_q0_fltr[5], lme_pvalues_q1_fltr[5], lme_pvalues_q2_fltr[5])
)

# table
knitr::kable(t(lme_pvalues_table_fltr), caption = "P-values from Permuted LME")
P-values from Permuted LME
Hill q0 q1 q2
Intercept 0.02 0.00 0.01
log.Depth. 0.09 0.00 0.03
Source 0 0 0
Location 0 0 0
Source.Location 0.03 0.00 0.00
# save 
write.csv(t(lme_pvalues_table_fltr), "results/03.Diversity/P-values_from_Permuted_LME_fltr.csv", row.names = FALSE)

Ok, aquí parece que a excepción de log(Effective reads) Source, Location y la interacción tienen un efecto significativo en la diversidad q0, q1 y q2.

Evaluemos los supuestos:

Homocedasticidad

bptest, solo funciona para lm no para lme :(

#Homocedasticidad p-value >0.05 si cumple supuesto de homocedasticidad

library(lmtest)
# q0
Homoce_lme_q0_fltr <- bptest(q0_lme_fltr)
Homoce_lme_q0_fltr

# q1
Homoce_lme_q1_fltr <- bptest(q1_lme_fltr)
Homoce_lme_q1_fltr

# q2
Homoce_lme_q2_fltr <- bptest(q2_lme_fltr)
Homoce_lme_q2_fltr
q0_lme_perm_fltr
## 
## Monte-Carlo test
## 
## Call: 
## PermTest.lme(obj = q0_lme_fltr, B = 100)
## 
## Based on 100 replicates
## Simulated p-value:
##                      p.value
## (Intercept)             0.02
## log(Effective_reads)    0.09
## Source                  0.00
## Location                0.00
## Source:Location         0.03

Check with easystats

q=0

#easystats::install_suggested()
library(easystats)
## # Attaching packages: easystats 0.7.1 (red = needs update)
## ✔ bayestestR  0.13.2    ✔ correlation 0.8.4  
## ✔ datawizard  0.10.0    ✔ effectsize  0.8.7  
## ✔ insight     0.19.10   ✔ modelbased  0.8.7  
## ✔ performance 0.11.0    ✔ parameters  0.21.6 
## ✔ report      0.5.8     ✖ see         0.8.3  
## 
## Restart the R-Session and update packages with `easystats::easystats_update()`.
#Remember model # q0_lme_fltr <- lme(q0 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
check_q0_lme_1 <- check_model(q0_lme_fltr)
## Converting missing values (`NA`) into regular values currently not
##   possible for variables of class `NULL`.
print(check_q0_lme_1)

check_q0_lme_2 <- check_model(update(q0_lme_fltr, .~. -Source:Location))
## Converting missing values (`NA`) into regular values currently not
##   possible for variables of class `NULL`.
print(check_q0_lme_2)

Al eliminar la interacción mejoró la colinealidad y la normalidad y linearidad se mantuvieron adecuadas. Me quedo entonces con este modelo ajustado

q=1

check_q1_lme_1 <- check_model(q1_lme_fltr)
## Converting missing values (`NA`) into regular values currently not
##   possible for variables of class `NULL`.
check_q1_lme_1

check_q1_lme_2 <- check_model(update(q1_lme_fltr, .~. -Source:Location))
## Converting missing values (`NA`) into regular values currently not
##   possible for variables of class `NULL`.
print(check_q1_lme_2)

Pues realmente, no mejoró mucho la linealidad, asi que seguiré con el modelo que después de lme dió mejores valores de AIC/BIC.

q=2

check_q2_lme_1 <- check_model(q2_lme_fltr)
## Converting missing values (`NA`) into regular values currently not
##   possible for variables of class `NULL`.
check_q2_lme_1

Pff este si estuvo muy cónico, next!

Checar que onda con esto:

#q0
alpha<- metadata_with_hill %>% unite("interact", c("Source", "Location"), remove = F)

q0_lme<-lme(q0~ interact, random=~1 |Specimen, data = alpha)
q0_lme_means<-emmeans(q0_lme, pairwise ~ interact)
multcomp::cld(object = q0_lme_means$emmeans,
              Letters = letters)

11.2 Core

11.2.1 Get Core Hill numbers

library(hilldiv)
library(tidyverse)
library(kableExtra)

# Get hill numbers
q0_c <- hill_div(otu_table_full_core, qvalue = 0) 
q1_c <- hill_div(otu_table_full_core, qvalue = 1) 
q2_c <- hill_div(otu_table_full_core, qvalue = 2)

# Merge metadata with Hill numbers
q012_c <- cbind(q0_c, q1_c, q2_c) %>% as.data.frame() %>% rownames_to_column(var = "sample")
metadata_with_hill_core <- q012_c %>%                                      
  inner_join(meta_full_core, by = c("sample"="sample"))

#save table
write.table(metadata_with_hill_core, "results/03.Diversity/Metadata_with_hill_core.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

# show
metadata_with_hill_core %>% head() %>% kable()
sample q0_c q1_c q2_c Source Date Year Location Week Raw.reads Colect_number Specimen quant_reading Effective_reads ASVs
DR1014-E 33 5.937934 3.364797 Stipe sep-30 2021 Monte obscuro Emiliano Zapata 1 159896 DRamos 1014 Ind_1 948 68295 135
DR1014-SAH 144 89.472107 62.663860 Soil associated sep-30 2021 Monte obscuro Emiliano Zapata 1 223804 DRamos 1014 Ind_1 3071 40974 2118
DR1018-E 122 6.037923 2.004335 Stipe oct-07 2021 Monte obscuro Emiliano Zapata 2 68578 DRamos 1018 Ind_3 97 23890 768
DR1018-SAH 153 91.533182 64.969780 Soil associated oct-07 2021 Monte obscuro Emiliano Zapata 2 211120 DRamos 1018 Ind_3 4017 38161 1824
DR1019-E 122 61.454388 33.429425 Stipe oct-07 2021 Monte obscuro Emiliano Zapata 2 144270 DRamos 1019 Ind_4 215 33853 1012
DR1019-SAH 153 96.430497 66.569922 Soil associated oct-07 2021 Monte obscuro Emiliano Zapata 2 185380 DRamos 1019 Ind_4 3754 34576 2202

11.2.2 Explore Core Hill distribution

Get means

# Reorder q values  
meta_qs_c <- metadata_with_hill_core %>%
    pivot_longer(cols = q0_c:q2_c, names_to = "q", values_to = "value") %>%
    filter(q %in% c("q0_c", "q1_c", "q2_c")) %>%
    mutate(
        qs = case_when(
            q == "q0_c" ~ "q=0",
            q == "q1_c" ~ "q=1",
            q == "q2_c" ~ "q=2"
        ))

#Get means of Hill numbers
means_c <- meta_qs_c %>% group_by(Source, Location, qs) %>%
  summarise(means = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE),
            .groups = 'drop')

#save table
write.table(means_c, "results/03.Diversity/Hill_means_sd_core.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)

print(means_c)
## # A tibble: 27 × 5
##    Source          Location                      qs    means    sd
##    <fct>           <chr>                         <chr> <dbl> <dbl>
##  1 Soil associated El Castillo/Zentla            q=0   234.  22.1 
##  2 Soil associated El Castillo/Zentla            q=1    96.2  8.67
##  3 Soil associated El Castillo/Zentla            q=2    44.4  9.64
##  4 Soil associated Monte obscuro Emiliano Zapata q=0   156.   9.41
##  5 Soil associated Monte obscuro Emiliano Zapata q=1    96.2  6.49
##  6 Soil associated Monte obscuro Emiliano Zapata q=2    66.7  8.14
##  7 Soil associated Naolinco                      q=0   327.  19.2 
##  8 Soil associated Naolinco                      q=1   101.  22.0 
##  9 Soil associated Naolinco                      q=2    38.0 16.4 
## 10 Rhizhomorph     El Castillo/Zentla            q=0   223.  21.0 
## # ℹ 17 more rows

11.2.3 Plot Core Hill distribution

library(ggpubr) 

# factor to reorder plot
metadata_with_hill_core$Location <- factor(metadata_with_hill_core$Location)
metadata_with_hill_core$Compartment  <- factor(metadata_with_hill_core$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))


#plot

hill_barplot_explore_core <- metadata_with_hill_core %>%
  pivot_longer(cols = q0_c:q2_c, names_to = "q", values_to = "value") %>%
    filter(q %in% c("q0_c", "q1_c", "q2_c")) %>%
    mutate(
        qs = case_when(
            q == "q0_c" ~ "q=0",
            q == "q1_c" ~ "q=1",
            q == "q2_c" ~ "q=2"
        )) %>%
  ggbarplot(
    x = "Compartment",
    y = "value",
    add = "mean_se",
    facet.by = c("qs", "Location"),
    fill = "Compartment"
  ) +
  scale_fill_jco() +
  geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
  facet_grid(rows = vars(qs), cols = vars(Location), scales = "free_y") +
  theme(
    strip.background =  element_blank(), #element_rect(fill = "")
    strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
    strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.line = element_line(colour = "black"),
    #panel.border = element_blank(),
    panel.spacing.x = unit(0.5, "lines"),
    panel.background = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y = element_text(colour = "black", size = 11),
    legend.position = "bottom"
  ) +
  labs(
    fill = "Compartment",
    y = "",
    x = "", #,
    title = "Biodiversity across Locations and Compartments of Core Microbiota"
  ) + #+
  #theme(plot.title = element_text(hjust = 0.5))
   theme(legend.text = element_text(size = 12)) #+
  #stat_summary(fun = mean, geom = "line", aes(group = Compartment), color = "red")

hill_barplot_explore_core

#ggsave("results/plots/02.diversity/02.Hill_diversity_across_Locations_and_Compartments_fltr.png", hill_barplot_explore_fltr, width = 11, height = 11)

11.3 Beta diversity Diversity

Prepare data

Explore

aldex_clr_data <- t(getMonteCarloSample(aldex_clr,1))
pca <- prcomp(aldex_clr_data)

meta=metadata_with_hill_fltr %>% dplyr::rename(SampleID="sample-id")

11.3.1 Full

11.3.1.1 clr manual

library(ggplot2)
library(tidyverse)
library(ggsci)
pca_plot<- ggplot() +
      geom_segment(data=data.frame(pca$rotation) %>%   #arrows
                   rownames_to_column(var = "Feature.ID")%>%  
                   mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
                   top_n(8, a) %>% #keep 10 furthest away points
                   mutate(PC1=PC1*50, PC2=PC2*50),
                 aes(x=0, xend=PC1, y=0, yend=PC2),
                 arrow = arrow(length = unit(0.3,"cm")))+
    geom_point(data=data.frame(pca$x) %>% #individuals
                 rownames_to_column(var = "SampleID")%>%
                 left_join(meta, by = "SampleID"),
               aes(x=PC1, y=PC2, fill=Source),shape=21, size=2) +
    geom_vline(xintercept = 0, linetype = 2) +   #lines-cross
    geom_hline(yintercept = 0, linetype = 2) +
  #  theme_linedraw()+
    scale_fill_jco()+#color of points 
    scale_color_jco()+#color of points 
    theme(axis.text = element_text(colour = "black", size = 12),
          axis.title = element_text(colour = "black", size = 12),
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 12), 
          legend.position = "right", 
          legend.box = "vertical") +
    geom_polygon(data=data.frame(pca$x) %>% #individuals
                   rownames_to_column(var = "SampleID")%>%
                   left_join(meta, by = "SampleID")%>%
                   drop_na() %>%
                  group_by(Location) %>% 
                   slice(chull(PC1, PC2)),
                 aes(x=PC1, y=PC2, fill=Source, color=Source),
                 alpha = 0.3,
                 show.legend = FALSE)+
  ylab("PC2")+xlab("PC1")+ theme_bw()
  #  ggrepel::geom_label_repel(data=data.frame(pca$rotation) %>%   #arrows
   # rownames_to_column(var = "Feature.ID")%>%  
    #mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
    #top_n(8, a) %>% #keep 10 furthest away points
    #mutate(PC1=PC1*10, PC2=PC2*10)%>% left_join(
     # taxonomys)%>% dplyr::select(
      #Taxon, PC1, PC2, Feature.ID)%>%
      #mutate_at(
       # c("Taxon"), ~str_replace(.,";s__unidentified", "")) %>% mutate(
     #   tax= str_extract(Taxon, "[^__]+$")) %>%
      #mutate_at(c("tax"), funs(tax = case_when(
       # tax=="Fungi" ~ "Unidentified",
        #tax=="sajor_caju" ~ "Lentinus",
        #TRUE~as.character(tax)))),
    #aes(x=PC1, y=PC2, label= tax),
    #segment.colour = NA, col = 'black', fill= "#EEEEEE",
    #fontface="italic",  box.padding = 0.2, size=4)
pca_plot

library(ggplot2)
library(dplyr)
library(tidyr)



# Reorder
meta$Source <- factor(meta$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

# plot
pca_plot <- ggplot(data = data.frame(pca$x) %>%
                    rownames_to_column(var = "SampleID") %>%
                    left_join(meta, by = "SampleID")) +
  geom_segment(data = data.frame(pca$rotation) %>%
                rownames_to_column(var = "Feature.ID") %>%
                mutate(a = sqrt(PC1^2 + PC2^2)) %>%
                top_n(8, a) %>%
                mutate(PC1 = PC1*50, PC2 = PC2*50),
                aes(x = 0, xend = PC1, y = 0, yend = PC2),
                arrow = arrow(length = unit(0.01,"cm"))) +
  geom_point(aes(x = PC1, y = PC2, color = Source, shape = Location), size = 3) +
  geom_vline(xintercept = 0, linetype = 2) +
  geom_hline(yintercept = 0, linetype = 2) +
  scale_color_jco() + 
  scale_shape_manual(values = c(15, 16, 17)) + # Ajusta los valores al número de categorías de 'Location'
  scale_fill_jco() +
  theme(axis.text = element_text(colour = "black", size = 12),
        axis.title = element_text(colour = "black", size = 12),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12), 
        legend.position = "right", 
        legend.box = "vertical") +
  labs(y = "PC2", x = "PC1", color = "Source", shape = "Location") +
   theme_bw()
  # ggrepel::geom_label_repel(data=data.frame(pca$rotation) %>%   #arrows
  #   rownames_to_column(var = "Feature.ID")%>%  
  #   mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
  #   top_n(8, a) %>% #keep 10 furthest away points
  #   mutate(PC1=PC1*10, PC2=PC2*10)%>% left_join(
  #     taxonomys)%>% dplyr::select(
  #     Taxon, PC1, PC2, Feature.ID)%>%
  #     mutate_at(
  #       c("Taxon"), ~str_replace(.,";s__unidentified", "")) %>% mutate(
  #       tax= str_extract(Taxon, "[^__]+$")) %>%
  #     mutate_at(c("tax"), funs(tax = case_when(
  #       tax=="Fungi" ~ "Unidentified",
  #       tax=="sajor_caju" ~ "Lentinus",
  #       TRUE~as.character(tax)))),
  #   aes(x=PC1, y=PC2, label= tax),
  #   segment.colour = NA, col = 'black', fill= "#EEEEEE",
  #   fontface="italic",  box.padding = 0.2, size=4)

pca_plot

nmds_source_clr = vegan::metaMDS(aldex_clr_data, trymax = 20, k = 2) 
var_stress_nmds_clr <- round(nmds_source_clr$stress, 5)
var_stress_nmds_clr

scores_source_clr= nmds_source_clr %>% vegan::scores() 
meta$Source <- factor(meta$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

nmds_plot_clr<- ggplot() +
      geom_point(data=data.frame(scores_source_clr) %>% #individuals
                 rownames_to_column(var = "SampleID")%>%
                 left_join(meta, by = "SampleID"),
               aes(x=NMDS1, y=NMDS2,  color = Source, shape = Location), size=4) +
  #  theme_linedraw()+
    scale_fill_jco()+#color of points 
    scale_color_jco()+#color of points 
    theme(axis.text = element_text(colour = "black", size = 12),
          axis.title = element_text(colour = "black", size = 12),
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 12), 
          legend.position = "right", 
          legend.box = "vertical")+ theme_bw() +
  ylab("NMDS2")+xlab("NMDS1")

nmds_plot_clr <- nmds_plot_clr +
  annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds),
           hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_clr

Me marca un error:

Error in eigen(-x/2, symmetric = TRUE) :
infinite or missing values in 'x'

11.3.1.2 bray

# Bray NMDS bray
library(vegan)
## Loading required package: permute
## 
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
## 
##     check
## Loading required package: lattice
## This is vegan 2.6-4
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
## 
##     diversity
bray=vegdist(t(otu_table_fltr), method = "bray")
nmds_source_bray = vegan::metaMDS(bray, trymax = 20, k = 2) 
## Run 0 stress 0.173438 
## Run 1 stress 0.2049176 
## Run 2 stress 0.2066556 
## Run 3 stress 0.1748666 
## Run 4 stress 0.1901403 
## Run 5 stress 0.1769892 
## Run 6 stress 0.1968377 
## Run 7 stress 0.1741147 
## Run 8 stress 0.1747265 
## Run 9 stress 0.1756538 
## Run 10 stress 0.1743105 
## Run 11 stress 0.1993007 
## Run 12 stress 0.1849932 
## Run 13 stress 0.20207 
## Run 14 stress 0.1734733 
## ... Procrustes: rmse 0.008022249  max resid 0.06554523 
## Run 15 stress 0.1744183 
## Run 16 stress 0.1899323 
## Run 17 stress 0.2033364 
## Run 18 stress 0.1748666 
## Run 19 stress 0.1769895 
## Run 20 stress 0.1878257 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     18: stress ratio > sratmax
##      2: scale factor of the gradient < sfgrmin
var_stress_nmds_bray <- round(nmds_source_bray$stress, 5)
var_stress_nmds_bray
## [1] 0.17344
scores_source_bray= nmds_source_bray %>% vegan::scores() 
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

nmds_plot_bray<- ggplot() +
      geom_point(data=data.frame(scores_source_bray) %>% #individuals
                 rownames_to_column(var = "sample")%>%
                 left_join(metadata_fltr, by = "sample"),
               aes(x=NMDS1, y=NMDS2,  color = Source, shape = Location), size=4) +
  #  theme_linedraw()+
    scale_fill_jco()+#color of points 
    scale_color_jco()+#color of points 
    theme(axis.text = element_text(colour = "black", size = 12),
          axis.title = element_text(colour = "black", size = 12),
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 12), 
          legend.position = "right", 
          legend.box = "vertical")+ theme_bw() +
  ylab("NMDS2")+xlab("NMDS1")

nmds_plot_bray <- nmds_plot_bray +
  annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_bray),
           hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_bray

11.3.1.3 aitchison

# NMDS aichitson
library(vegan)
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
aitch=vegdist(t(otu_table_fltr_pseudo), method = "aitchison")
nmds_source_aitch = vegan::metaMDS(aitch, trymax = 20, k = 2) 
## Run 0 stress 0.1675395 
## Run 1 stress 0.1760583 
## Run 2 stress 0.1760584 
## Run 3 stress 0.2104237 
## Run 4 stress 0.1826827 
## Run 5 stress 0.2180643 
## Run 6 stress 0.1691633 
## Run 7 stress 0.1681098 
## Run 8 stress 0.1702249 
## Run 9 stress 0.2232853 
## Run 10 stress 0.2090027 
## Run 11 stress 0.1750904 
## Run 12 stress 0.1924574 
## Run 13 stress 0.2729239 
## Run 14 stress 0.2672364 
## Run 15 stress 0.1712287 
## Run 16 stress 0.1760503 
## Run 17 stress 0.1682299 
## Run 18 stress 0.1840784 
## Run 19 stress 0.2234649 
## Run 20 stress 0.1846116 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
var_stress_nmds_aitch <- round(nmds_source_aitch$stress, 5)
var_stress_nmds_aitch
## [1] 0.16754
scores_source_aitch= nmds_source_aitch %>% vegan::scores() 
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

nmds_plot_aitch<- ggplot() +
      geom_point(data=data.frame(scores_source_aitch) %>% #individuals
                 rownames_to_column(var = "sample")%>%
                 left_join(metadata_fltr, by = "sample"),
               aes(x=NMDS1, y=NMDS2,  color = Source, shape = Location), size=4) +
  #  theme_linedraw()+
    scale_fill_jco()+#color of points 
    scale_color_jco()+#color of points 
    theme(axis.text = element_text(colour = "black", size = 12),
          axis.title = element_text(colour = "black", size = 12),
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 12), 
          legend.position = "right", 
          legend.box = "vertical")+ theme_bw() +
  ylab("NMDS2")+xlab("NMDS1")

nmds_plot_aitch <- nmds_plot_aitch +
  annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_aitch),
           hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_aitch

library(vegan)
bray_dist =vegdist(t(otu_table_fltr), method = "bray")
pcoas=wcmdscale(bray_dist, eig = T)

pcoa_plot<- ggplot() +
      geom_point(data=data.frame(pcoas$points) %>% #individuals
                 rownames_to_column(var = "SampleID")%>%
                 left_join(meta, by = "SampleID"),
               aes(x=Dim1, y=Dim2,color = Source, shape = Location ), size=4) +
    #geom_vline(xintercept = 0, linetype = 2) +   #lines-cross
    #geom_hline(yintercept = 0, linetype = 2) +
  #  theme_linedraw()+
    scale_fill_jco()+#color of points 
    scale_color_jco()+#color of points 
    theme(axis.text = element_text(colour = "black", size = 12),
          axis.title = element_text(colour = "black", size = 12),
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 12), 
          legend.position = "right", 
          legend.box = "vertical")+ theme_bw()+
    #geom_polygon(data=data.frame(pca$x) %>% #individuals
                   #rownames_to_column(var = "SampleID")%>%
                   #left_join(metadata, by = "SampleID")%>%
                   #drop_na() %>%
                   #group_by(Type_of_soil) %>% 
                   #slice(chull(PC1, PC2)),
                 #aes(x=PC1, y=PC2, fill=Type_of_soil, color=Type_of_soil),
                 #alpha = 0.3,
                 #show.legend = FALSE)+
  ylab("PCoA2")+xlab("PCoA1")
pcoa_plot

aitchison independientes por loction

library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(cowplot)  # Cargar la librería para combinar gráficos

# Agregar pseudocount y calcular la distancia de Aitchison para cada 'Location'
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount

# Crear una lista para guardar los resultados de NMDS y los valores de estrés por 'Location'
nmds_list <- list()
stress_values <- c()

# Realizar NMDS para cada 'Location' y guardar los resultados en la lista
unique_locations <- unique(meta$Location)

for (loc in unique_locations) {
  # Filtrar la tabla OTU por 'Location'
  otu_subset <- otu_table_fltr_pseudo[, meta$Location == loc, drop=FALSE]
  aitch <- vegdist(t(otu_subset), method = "aitchison")
  
  # Ejecutar NMDS
  nmds_result <- metaMDS(aitch, trymax = 20, k = 3)
  
  # Guardar los resultados y valores de estrés en la lista
  nmds_list[[loc]] <- nmds_result
  stress_values[loc] <- nmds_result$stress
}
## Run 0 stress 0.09286269 
## Run 1 stress 0.09286263 
## ... New best solution
## ... Procrustes: rmse 0.001127248  max resid 0.002186399 
## ... Similar to previous best
## Run 2 stress 0.09017429 
## ... New best solution
## ... Procrustes: rmse 0.043704  max resid 0.1242266 
## Run 3 stress 0.09017437 
## ... Procrustes: rmse 0.0000480378  max resid 0.0001201519 
## ... Similar to previous best
## Run 4 stress 0.1015067 
## Run 5 stress 0.09286238 
## Run 6 stress 0.1020582 
## Run 7 stress 0.1015077 
## Run 8 stress 0.09017423 
## ... New best solution
## ... Procrustes: rmse 0.00006679391  max resid 0.0001715491 
## ... Similar to previous best
## Run 9 stress 0.09017468 
## ... Procrustes: rmse 0.0002998592  max resid 0.0007742236 
## ... Similar to previous best
## Run 10 stress 0.1019489 
## Run 11 stress 0.09010485 
## ... New best solution
## ... Procrustes: rmse 0.02879691  max resid 0.08604193 
## Run 12 stress 0.1019491 
## Run 13 stress 0.09010375 
## ... New best solution
## ... Procrustes: rmse 0.0006040018  max resid 0.00175466 
## ... Similar to previous best
## Run 14 stress 0.09286276 
## Run 15 stress 0.09020433 
## ... Procrustes: rmse 0.01953044  max resid 0.05798785 
## Run 16 stress 0.1411114 
## Run 17 stress 0.1019489 
## Run 18 stress 0.09010307 
## ... New best solution
## ... Procrustes: rmse 0.00209473  max resid 0.005696866 
## ... Similar to previous best
## Run 19 stress 0.09017392 
## ... Procrustes: rmse 0.03002276  max resid 0.08942171 
## Run 20 stress 0.09286262 
## *** Best solution repeated 1 times
## Run 0 stress 0.09353877 
## Run 1 stress 0.0769705 
## ... New best solution
## ... Procrustes: rmse 0.0815555  max resid 0.2052233 
## Run 2 stress 0.07886688 
## Run 3 stress 0.07697121 
## ... Procrustes: rmse 0.0009006034  max resid 0.00377735 
## ... Similar to previous best
## Run 4 stress 0.07697085 
## ... Procrustes: rmse 0.0007500296  max resid 0.003145672 
## ... Similar to previous best
## Run 5 stress 0.0760654 
## ... New best solution
## ... Procrustes: rmse 0.04531549  max resid 0.1757149 
## Run 6 stress 0.07593257 
## ... New best solution
## ... Procrustes: rmse 0.02151344  max resid 0.09614764 
## Run 7 stress 0.07606873 
## ... Procrustes: rmse 0.01869065  max resid 0.07832724 
## Run 8 stress 0.07606475 
## ... Procrustes: rmse 0.02138182  max resid 0.09531475 
## Run 9 stress 0.07606451 
## ... Procrustes: rmse 0.02133414  max resid 0.09503959 
## Run 10 stress 0.07593271 
## ... Procrustes: rmse 0.0006539741  max resid 0.002707354 
## ... Similar to previous best
## Run 11 stress 0.07593296 
## ... Procrustes: rmse 0.0002252735  max resid 0.0009469119 
## ... Similar to previous best
## Run 12 stress 0.08555555 
## Run 13 stress 0.07886705 
## Run 14 stress 0.07606783 
## ... Procrustes: rmse 0.0192141  max resid 0.08196534 
## Run 15 stress 0.07971444 
## Run 16 stress 0.07697084 
## Run 17 stress 0.07593299 
## ... Procrustes: rmse 0.0007989614  max resid 0.003305441 
## ... Similar to previous best
## Run 18 stress 0.07697085 
## Run 19 stress 0.07886746 
## Run 20 stress 0.07593299 
## ... Procrustes: rmse 0.0001929159  max resid 0.0007967887 
## ... Similar to previous best
## *** Best solution repeated 4 times
## Run 0 stress 0.09080181 
## Run 1 stress 0.09080182 
## ... Procrustes: rmse 0.00002879506  max resid 0.00008541925 
## ... Similar to previous best
## Run 2 stress 0.09080181 
## ... Procrustes: rmse 0.0000252384  max resid 0.00007395126 
## ... Similar to previous best
## Run 3 stress 0.09080181 
## ... Procrustes: rmse 0.00000736552  max resid 0.00002266562 
## ... Similar to previous best
## Run 4 stress 0.09080184 
## ... Procrustes: rmse 0.00006170891  max resid 0.0001818799 
## ... Similar to previous best
## Run 5 stress 0.09080181 
## ... Procrustes: rmse 0.00001008042  max resid 0.00002703597 
## ... Similar to previous best
## Run 6 stress 0.09080181 
## ... Procrustes: rmse 0.00002341484  max resid 0.0000681311 
## ... Similar to previous best
## Run 7 stress 0.09080181 
## ... Procrustes: rmse 0.000004666692  max resid 0.00001550806 
## ... Similar to previous best
## Run 8 stress 0.09080181 
## ... Procrustes: rmse 0.00002391561  max resid 0.00008072421 
## ... Similar to previous best
## Run 9 stress 0.09080181 
## ... Procrustes: rmse 0.000006583693  max resid 0.0000193473 
## ... Similar to previous best
## Run 10 stress 0.09080181 
## ... Procrustes: rmse 0.000001971398  max resid 0.000006157192 
## ... Similar to previous best
## Run 11 stress 0.1092961 
## Run 12 stress 0.09080181 
## ... Procrustes: rmse 0.0000136451  max resid 0.00003854882 
## ... Similar to previous best
## Run 13 stress 0.09080181 
## ... Procrustes: rmse 0.000009283839  max resid 0.00003650044 
## ... Similar to previous best
## Run 14 stress 0.09080181 
## ... Procrustes: rmse 0.00002347726  max resid 0.00006888059 
## ... Similar to previous best
## Run 15 stress 0.09080181 
## ... Procrustes: rmse 0.00001835347  max resid 0.00006722972 
## ... Similar to previous best
## Run 16 stress 0.09080181 
## ... Procrustes: rmse 0.000001628275  max resid 0.000005246091 
## ... Similar to previous best
## Run 17 stress 0.09080181 
## ... Procrustes: rmse 0.000003733629  max resid 0.00001315245 
## ... Similar to previous best
## Run 18 stress 0.09080181 
## ... Procrustes: rmse 0.00001137739  max resid 0.00003451799 
## ... Similar to previous best
## Run 19 stress 0.09080181 
## ... Procrustes: rmse 0.000003433678  max resid 0.000009049462 
## ... Similar to previous best
## Run 20 stress 0.09080181 
## ... Procrustes: rmse 0.0000238389  max resid 0.00005808449 
## ... Similar to previous best
## *** Best solution repeated 19 times
# Crear una función para generar el gráfico NMDS con stress
plot_nmds <- function(nmds_result, location_name) {
  scores_df <- as.data.frame(scores(nmds_result)) %>%
    rownames_to_column(var = "SampleID") %>%
    left_join(meta, by = "SampleID") %>%
    mutate(Location = location_name)
  
  p <- ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Source)) +
    geom_point(size = 4) +
    labs(title = paste("Location:", location_name, "\nStress:", round(stress_values[location_name], 5)),
         x = "NMDS1", y = "NMDS2", color = "Source") +
    theme_bw() +
    scale_color_jco() +
    theme(legend.position = "right", legend.box = "vertical")
  
  return(p)
}

# Aplicar la función a cada resultado NMDS y guardar los plots
nmds_plots <- lapply(names(nmds_list), function(loc) {
  plot_nmds(nmds_list[[loc]], loc)
})

# Usar cowplot para combinar los gráficos
nmds_combined_plot <- plot_grid(plotlist = nmds_plots, ncol = 3, align = 'v')

# Mostrar el gráfico combinado
print(nmds_combined_plot)

# Guardar la gráfica combinada si es necesario
#ggsave("combined_NMDS_plot.png", nmds_combined_plot, width = 12, height =  &#8203;``【oaicite:0】``&#8203;

Permanova

meta_just = data.frame(aldex_clr_data, check.names = F) %>% 
  rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr) 
## Joining with `by = join_by(`sample-id`)`
meta_just = data.frame(t(otu_table_fltr_pseudo), check.names = F) %>% 
  rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr)
## Joining with `by = join_by(`sample-id`)`
permanovas = adonis2(t(otu_table_fltr_pseudo) ~ Source*Location,
                     data=meta_just, method = "aitchison", 
                     permutations = 999, strata = meta_just$Specimen)
permanovas

Permanovas aitchison independientes

library(vegan)
library(dplyr)
library(tidyr)



#  pseudocount
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount

# Combine
meta_just <- metadata_with_hill_fltr %>%
  dplyr::select(`sample-id`, Location, Source, Specimen) %>%
  inner_join(as.data.frame(t(otu_table_fltr_pseudo)) %>% rownames_to_column(var = "sample-id"), by = "sample-id")

# 
permanova_results <- list()

# 
for (loc in unique(metadata_with_hill_fltr$Location)) {
  meta_just_loc <- meta_just %>%
    filter(Location == loc)
  otu_table_loc <- otu_table_fltr_pseudo[, meta_just_loc$`sample-id`, drop = FALSE]

  aitch_loc <- vegdist(t(otu_table_loc), method = "aitchison")

  permanova_results[[loc]] <- adonis2(aitch_loc ~ Source, 
                                      data = meta_just_loc, 
                                      permutations = 999, 
                                      strata = meta_just_loc$Specimen)
}
permanova_results
## $`Monte obscuro Emiliano Zapata`
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 1151
## 
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
##          Df SumOfSqs      R2      F Pr(>F)    
## Source    2    20333 0.25427 2.2163  0.001 ***
## Residual 13    59635 0.74573                  
## Total    15    79968 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`El Castillo/Zentla`
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
##          Df SumOfSqs      R2      F Pr(>F)    
## Source    2    15850 0.13282 1.7614  0.001 ***
## Residual 23   103480 0.86718                  
## Total    25   119330 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Naolinco
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
##          Df SumOfSqs      R2      F Pr(>F)    
## Source    2    20640 0.13271 2.0658  0.001 ***
## Residual 27   134882 0.86729                  
## Total    29   155522 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

11.3.2 Core

11.3.2.1 Aitchison

Prepare data

Explore

microbiome::summarize_phyloseq(full_ps_core_counts)
## Compositional = NO2
## 1] Min. number of reads = 32892] Max. number of reads = 1320233] Total number of reads = 22961414] Average number of reads = 31890.84722222225] Median number of reads = 26767.57] Sparsity = 0.3817351598173526] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 3289"
## 
## [[2]]
## [1] "2] Max. number of reads = 132023"
## 
## [[3]]
## [1] "3] Total number of reads = 2296141"
## 
## [[4]]
## [1] "4] Average number of reads = 31890.8472222222"
## 
## [[5]]
## [1] "5] Median number of reads = 26767.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.381735159817352"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 12"
## 
## [[11]]
##  [1] "sample"          "Source"          "Date"            "Year"           
##  [5] "Location"        "Week"            "Raw.reads"       "Colect_number"  
##  [9] "Specimen"        "quant_reading"   "Effective_reads" "ASVs"
#Get otu and metadata table of core phyloseq object
# extract otu table
otu_table_full_core <- otu_table(full_ps_core_counts@otu_table)
meta_full_core <- sample_data(full_ps_core_counts)
# Metadata
#meta_full_core <- data.frame(phyloseq::sample_data(pseq),check.names = FALSE)
# NMDS aichitson
library(vegan)
pseudocount <- 1
otu_table_full_core_pseudo <- otu_table_full_core + pseudocount
aitch_fc=vegdist(t(otu_table_full_core_pseudo), method = "aitchison")
nmds_source_aitch_fc = vegan::metaMDS(aitch_fc, trymax = 20, k = 2) 
## Run 0 stress 0.1321064 
## Run 1 stress 0.1427431 
## Run 2 stress 0.1794177 
## Run 3 stress 0.171119 
## Run 4 stress 0.1729933 
## Run 5 stress 0.1426311 
## Run 6 stress 0.132074 
## ... New best solution
## ... Procrustes: rmse 0.004520532  max resid 0.03590215 
## Run 7 stress 0.1681577 
## Run 8 stress 0.1748434 
## Run 9 stress 0.153449 
## Run 10 stress 0.1320773 
## ... Procrustes: rmse 0.001108172  max resid 0.007156763 
## ... Similar to previous best
## Run 11 stress 0.1427431 
## Run 12 stress 0.1321064 
## ... Procrustes: rmse 0.004522692  max resid 0.03591593 
## Run 13 stress 0.1683264 
## Run 14 stress 0.1620791 
## Run 15 stress 0.1321064 
## ... Procrustes: rmse 0.004520646  max resid 0.03591293 
## Run 16 stress 0.1519927 
## Run 17 stress 0.1426311 
## Run 18 stress 0.132074 
## ... New best solution
## ... Procrustes: rmse 0.00000686812  max resid 0.00004317669 
## ... Similar to previous best
## Run 19 stress 0.151375 
## Run 20 stress 0.158456 
## *** Best solution repeated 1 times
var_stress_nmds_aitch_fc <- round(nmds_source_aitch_fc$stress, 5)
var_stress_nmds_aitch_fc
## [1] 0.13207
scores_source_aitch_fc= nmds_source_aitch_fc %>% vegan::scores() 
meta_full_core$Source <- factor(meta_full_core$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))

nmds_plot_aitch_fc<- ggplot() +
      geom_point(data=data.frame(scores_source_aitch_fc) %>% #individuals
                 rownames_to_column(var = "sample")%>%
                 left_join(meta_full_core, by = "sample"),
               aes(x=NMDS1, y=NMDS2,  color = Source, shape = Location), size=4) +
  #  theme_linedraw()+
    scale_fill_jco()+#color of points 
    scale_color_jco()+#color of points 
    theme(axis.text = element_text(colour = "black", size = 12),
          axis.title = element_text(colour = "black", size = 12),
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 12), 
          legend.position = "right", 
          legend.box = "vertical")+ theme_bw() +
  ylab("NMDS2")+xlab("NMDS1")

nmds_plot_aitch_fc <- nmds_plot_aitch_fc +
  annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_aitch_fc),
           hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_aitch_fc

Permanova

meta_core_pseudo = data.frame(t(otu_table_full_core_pseudo), check.names = F) %>% 
  rownames_to_column(var = "sample") %>% inner_join(meta_full_core) 
## Joining with `by = join_by(sample)`
perm_core = adonis2(t(otu_table_full_core_pseudo) ~ Source*Location,
                     data=meta_core_pseudo, method = "aitchison", 
                     permutations = 999, strata = meta_core_pseudo$Specimen)
perm_core
meta_just = data.frame(t(otu_table_fltr_pseudo), check.names = F) %>% 
  rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr)
## Joining with `by = join_by(`sample-id`)`
permanovas = adonis2(t(otu_table_fltr_pseudo) ~ Source*Location,
                     data=meta_just, method = "aitchison", 
                     permutations = 999, strata = meta_just$Specimen)
permanovas

12 DA

##res prim tutorial https://www.yanh.org/2021/01/01/microbiome-r/
library(ANCOMBC)
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
####################################################################
#Run ANCOM-BC at the Family level and only including the prevalent genera
####################################################################
set.seed(123)
output_genus_full = ancombc2(data = tse,
                         assay_name = "counts",
                         tax_level = "Genus",
                         fix_formula = "Source+Location",
                         rand_formula = "(1|Specimen)",
                         p_adj_method = "holm",
                         pseudo_sens = TRUE,
                         prv_cut = 0.10,
                         lib_cut = 1000,
                         s0_perc = 0.05,
                         group = "Source",
                         struc_zero = TRUE,
                         neg_lb = TRUE,
                         alpha = 0.05,
                         n_cl = 2,
                         verbose = TRUE,
                         global = TRUE,
                         pairwise = TRUE,
                         dunnet = TRUE,
                         trend = TRUE,
                         iter_control = list(tol = 1e-2, max_iter = 20,
                                             verbose = TRUE),
                         em_control = list(tol = 1e-5, max_iter = 100),
                         lme_control = lme4::lmerControl(),
                         mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
                         trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
                                                                     nrow = 2,
                                                                     byrow = TRUE),
                                                              matrix(c(-1, 0, 1, -1),
                                                                     nrow = 2,
                                                                     byrow = TRUE),
                                                              matrix(c(1, 0, 1, -1),
                                                                     nrow = 2,
                                                                     byrow = TRUE)),
                                              node = list(2, 2, 1),
                                              solver = "ECOS",
                                              B = 10))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Obtaining initial estimates ...
## REML iteration = 1, epsilon = 2.3
## REML iteration = 2, epsilon = 0.61
## REML iteration = 3, epsilon = 0.36
## REML iteration = 4, epsilon = 0.21
## REML iteration = 5, epsilon = 0.13
## REML iteration = 6, epsilon = 0.079
## REML iteration = 7, epsilon = 0.049
## REML iteration = 8, epsilon = 0.031
## REML iteration = 9, epsilon = 0.019
## REML iteration = 10, epsilon = 0.012
## REML iteration = 11, epsilon = 0.0078
## Estimating sample-specific biases ...
## ANCOM-BC2 primary results ...
## The sensitivity analysis for the pseudo-count addition ...
## ANCOM-BC2 global test ...
## ANCOM-BC2 pairwise directional test ...
## ANCOM-BC2 Dunnet's type of test ...
## ANCOM-BC2 trend test ...
save.image("src/Diversity.RData")